AMDGPU/GlobalISel: Legalize G_PTR_ADD for arbitrary pointers

Pointers of unrecognized address spaces shoudl be treated as
global-like pointers. Even if loads and stores of them aren't handled,
dumb operations that just operate on the bits should work.
This commit is contained in:
Matt Arsenault 2020-01-21 16:07:22 -05:00 committed by Matt Arsenault
parent aa91ce3e1d
commit fd109308a7
3 changed files with 178 additions and 16 deletions

View File

@ -499,8 +499,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
}
getActionDefinitionsBuilder(G_PTR_ADD)
.legalForCartesianProduct(AddrSpaces64, {S64})
.legalForCartesianProduct(AddrSpaces32, {S32})
.legalIf(isPointer(0))
.scalarize(0);
getActionDefinitionsBuilder(G_PTR_MASK)

View File

@ -438,3 +438,149 @@ body: |
S_ENDPGM 0, implicit %2
...
---
name: gep_p999_sgpr_sgpr
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
; GFX6-LABEL: name: gep_p999_sgpr_sgpr
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX8-LABEL: name: gep_p999_sgpr_sgpr
; GFX8: $vcc_hi = IMPLICIT_DEF
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX9-LABEL: name: gep_p999_sgpr_sgpr
; GFX9: $vcc_hi = IMPLICIT_DEF
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX9: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
; GFX9: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX9: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-WAVE64-LABEL: name: gep_p999_sgpr_sgpr
; GFX10-WAVE64: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-WAVE64: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
; GFX10-WAVE64: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX10-WAVE64: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
; GFX10-WAVE64: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX10-WAVE64: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
; GFX10-WAVE64: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
; GFX10-WAVE64: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-WAVE32-LABEL: name: gep_p999_sgpr_sgpr
; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
; GFX10-WAVE32: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-WAVE32: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
; GFX10-WAVE32: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX10-WAVE32: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
; GFX10-WAVE32: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX10-WAVE32: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
; GFX10-WAVE32: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
; GFX10-WAVE32: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(p999) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = COPY $sgpr2_sgpr3
%2:sgpr(p999) = G_PTR_ADD %0, %1
S_ENDPGM 0, implicit %2
...
---
name: gep_p999_vgpr_vgpr
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX6-LABEL: name: gep_p999_vgpr_vgpr
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX8-LABEL: name: gep_p999_vgpr_vgpr
; GFX8: $vcc_hi = IMPLICIT_DEF
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX8: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX9-LABEL: name: gep_p999_vgpr_vgpr
; GFX9: $vcc_hi = IMPLICIT_DEF
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX9: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-WAVE64-LABEL: name: gep_p999_vgpr_vgpr
; GFX10-WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10-WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
; GFX10-WAVE64: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX10-WAVE64: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
; GFX10-WAVE64: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-WAVE32-LABEL: name: gep_p999_vgpr_vgpr
; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
; GFX10-WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10-WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
; GFX10-WAVE32: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX10-WAVE32: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
; GFX10-WAVE32: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(p999) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(p999) = G_PTR_ADD %0, %1
S_ENDPGM 0, implicit %2
...

View File

@ -10,8 +10,8 @@ body: |
; CHECK-LABEL: name: test_gep_global_i64_idx
; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
; CHECK: [[GEP:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
; CHECK: $vgpr0_vgpr1 = COPY [[GEP]](p1)
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
; CHECK: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(p1) = G_PTR_ADD %0, %1
@ -28,8 +28,8 @@ body: |
; CHECK-LABEL: name: test_gep_flat_i64_idx
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
; CHECK: $vgpr0_vgpr1 = COPY [[GEP]](p0)
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
; CHECK: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p0)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(p0) = G_PTR_ADD %0, %1
@ -46,8 +46,8 @@ body: |
; CHECK-LABEL: name: test_gep_constant_i64_idx
; CHECK: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
; CHECK: [[GEP:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
; CHECK: $vgpr0_vgpr1 = COPY [[GEP]](p4)
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
; CHECK: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p4)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(p4) = G_PTR_ADD %0, %1
@ -64,8 +64,8 @@ body: |
; CHECK-LABEL: name: test_gep_local_i32_idx
; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[GEP:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[GEP]](p3)
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[PTR_ADD]](p3)
%0:_(p3) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(p3) = G_PTR_ADD %0, %1
@ -82,8 +82,8 @@ body: |
; CHECK-LABEL: name: test_gep_private_i32_idx
; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[GEP:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[GEP]](p5)
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[PTR_ADD]](p5)
%0:_(p5) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(p5) = G_PTR_ADD %0, %1
@ -100,8 +100,8 @@ body: |
; CHECK-LABEL: name: test_gep_constant32_i32_idx
; CHECK: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK: [[GEP:%[0-9]+]]:_(p6) = G_PTR_ADD [[COPY]], [[COPY1]](s32)
; CHECK: $sgpr0 = COPY [[GEP]](p6)
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p6) = G_PTR_ADD [[COPY]], [[COPY1]](s32)
; CHECK: $sgpr0 = COPY [[PTR_ADD]](p6)
%0:_(p6) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(p6) = G_PTR_ADD %0, %1
@ -118,11 +118,28 @@ body: |
; CHECK-LABEL: name: test_gep_region_i32_idx
; CHECK: [[COPY:%[0-9]+]]:_(p2) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[GEP:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY]], [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[GEP]](p2)
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY]], [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[PTR_ADD]](p2)
%0:_(p2) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(p2) = G_PTR_ADD %0, %1
$vgpr0 = COPY %2
...
---
name: test_gep_p999_i64_idx
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; CHECK-LABEL: name: test_gep_p999_i64_idx
; CHECK: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p999) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
; CHECK: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p999)
%0:_(p999) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(p999) = G_PTR_ADD %0, %1
$vgpr0_vgpr1 = COPY %2
...