From 32897c05ab6ba90c3496270046aa5664b37f6557 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 3 Nov 2020 13:31:59 +0000 Subject: [PATCH] [AMDGPU] Specify a triple to avoid codegen changes depending on host OS --- llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll | 12 +- llvm/test/CodeGen/AMDGPU/ds_read2.ll | 186 +++++++++------------- llvm/test/CodeGen/AMDGPU/ds_write2.ll | 85 +++------- 3 files changed, 107 insertions(+), 176 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll index 2a4ede05acd6..b79359c7705f 100644 --- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s declare i32 @llvm.amdgcn.workitem.id.x() #0 @@ -10,7 +10,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 { ; CI-LABEL: write_ds_sub0_offset0_global: ; CI: ; %bb.0: ; %entry ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_sub_i32_e32 v0, vcc, lds.obj@abs32@lo, v0 +; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 ; CI-NEXT: v_mov_b32_e32 v1, 0x7b ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_write_b32 v0, v1 offset:12 @@ -19,7 +19,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 { ; GFX9-LABEL: write_ds_sub0_offset0_global: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: v_sub_u32_e32 v0, lds.obj@abs32@lo, v0 +; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7b ; GFX9-NEXT: ds_write_b32 v0, v1 offset:12 ; GFX9-NEXT: s_endpgm @@ -37,7 +37,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v ; CI: ; %bb.0: ; %entry ; CI-NEXT: s_load_dword s0, s[0:1], 0x9 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_sub_i32_e32 v0, vcc, lds.obj@abs32@lo, v0 +; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 ; CI-NEXT: s_mov_b64 vcc, 0 ; CI-NEXT: v_mov_b32_e32 v2, 0x7b ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -57,7 +57,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 ; GFX9-NEXT: s_mov_b64 vcc, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: v_sub_u32_e32 v0, lds.obj@abs32@lo, v0 +; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_div_fmas_f32 v2, v1, v1, v1 diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll index 01d47662e1d5..15e97e3d471c 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-ALIGNED %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-ALIGNED %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s ; FIXME: We don't get cases where the address was an SGPR because we ; get a copy to the address register for each one. @@ -13,9 +13,8 @@ define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 { ; CI-LABEL: simple_read2_f32: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_add_i32_e32 v1, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_read2_b32 v[1:2], v1 offset1:8 +; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 @@ -28,8 +27,7 @@ define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 { ; GFX9-LABEL: simple_read2_f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v2 -; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:8 +; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:8 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 @@ -51,9 +49,8 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out) ; CI-LABEL: simple_read2_f32_max_offset: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_add_i32_e32 v1, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_read2_b32 v[1:2], v1 offset1:255 +; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:255 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 @@ -66,8 +63,7 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out) ; GFX9-LABEL: simple_read2_f32_max_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v2 -; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:255 +; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:255 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 @@ -89,15 +85,14 @@ define amdgpu_kernel void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 ; CI-LABEL: simple_read2_f32_too_far: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_add_i32_e32 v1, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_read_b32 v2, v1 -; CI-NEXT: ds_read_b32 v1, v1 offset:1028 +; CI-NEXT: ds_read_b32 v1, v0 +; CI-NEXT: ds_read_b32 v2, v0 offset:1028 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_add_f32_e32 v2, v2, v1 +; CI-NEXT: v_add_f32_e32 v2, v1, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: s_endpgm @@ -105,12 +100,11 @@ define amdgpu_kernel void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 ; GFX9-LABEL: simple_read2_f32_too_far: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: v_add_u32_e32 v1, lds@abs32@lo, v0 -; GFX9-NEXT: ds_read_b32 v2, v1 -; GFX9-NEXT: ds_read_b32 v1, v1 offset:1028 +; GFX9-NEXT: ds_read_b32 v1, v0 +; GFX9-NEXT: ds_read_b32 v2, v0 offset:1028 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX9-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -129,18 +123,16 @@ define amdgpu_kernel void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { ; CI-LABEL: simple_read2_f32_x2: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_add_i32_e32 v3, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_read2_b32 v[1:2], v3 offset1:8 +; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8 +; CI-NEXT: ds_read2_b32 v[3:4], v0 offset0:11 offset1:27 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_add_f32_e32 v4, v1, v2 -; CI-NEXT: ds_read2_b32 v[1:2], v3 offset0:11 offset1:27 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_f32_e32 v1, v1, v2 -; CI-NEXT: v_add_f32_e32 v2, v4, v1 +; CI-NEXT: v_add_f32_e32 v2, v3, v4 +; CI-NEXT: v_add_f32_e32 v2, v1, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: s_endpgm @@ -148,9 +140,8 @@ define amdgpu_kernel void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { ; GFX9-LABEL: simple_read2_f32_x2: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0 -; GFX9-NEXT: v_add_u32_e32 v2, lds@abs32@lo, v4 -; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:8 -; GFX9-NEXT: ds_read2_b32 v[2:3], v2 offset0:11 offset1:27 +; GFX9-NEXT: ds_read2_b32 v[0:1], v4 offset1:8 +; GFX9-NEXT: ds_read2_b32 v[2:3], v4 offset0:11 offset1:27 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 @@ -188,19 +179,18 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) ; CI-LABEL: simple_read2_f32_x2_barrier: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_add_i32_e32 v3, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_read2_b32 v[1:2], v3 offset1:8 +; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_barrier ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 -; CI-NEXT: v_add_f32_e32 v4, v1, v2 -; CI-NEXT: ds_read2_b32 v[1:2], v3 offset0:11 offset1:27 +; CI-NEXT: v_add_f32_e32 v3, v1, v2 +; CI-NEXT: ds_read2_b32 v[1:2], v0 offset0:11 offset1:27 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_f32_e32 v1, v1, v2 -; CI-NEXT: v_add_f32_e32 v2, v4, v1 +; CI-NEXT: v_add_f32_e32 v2, v3, v1 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: s_endpgm @@ -208,11 +198,10 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) ; GFX9-LABEL: simple_read2_f32_x2_barrier: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0 -; GFX9-NEXT: v_add_u32_e32 v2, lds@abs32@lo, v4 -; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:8 +; GFX9-NEXT: ds_read2_b32 v[0:1], v4 offset1:8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_barrier -; GFX9-NEXT: ds_read2_b32 v[2:3], v2 offset0:11 offset1:27 +; GFX9-NEXT: ds_read2_b32 v[2:3], v4 offset0:11 offset1:27 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -253,18 +242,16 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* ; CI-LABEL: simple_read2_f32_x2_nonzero_base: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_add_i32_e32 v3, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_read2_b32 v[1:2], v3 offset0:2 offset1:8 +; CI-NEXT: ds_read2_b32 v[1:2], v0 offset0:2 offset1:8 +; CI-NEXT: ds_read2_b32 v[3:4], v0 offset0:11 offset1:27 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_add_f32_e32 v4, v1, v2 -; CI-NEXT: ds_read2_b32 v[1:2], v3 offset0:11 offset1:27 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_f32_e32 v1, v1, v2 -; CI-NEXT: v_add_f32_e32 v2, v4, v1 +; CI-NEXT: v_add_f32_e32 v2, v3, v4 +; CI-NEXT: v_add_f32_e32 v2, v1, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:8 ; CI-NEXT: s_endpgm @@ -272,9 +259,8 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* ; GFX9-LABEL: simple_read2_f32_x2_nonzero_base: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0 -; GFX9-NEXT: v_add_u32_e32 v2, lds@abs32@lo, v4 -; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset0:2 offset1:8 -; GFX9-NEXT: ds_read2_b32 v[2:3], v2 offset0:11 offset1:27 +; GFX9-NEXT: ds_read2_b32 v[0:1], v4 offset0:2 offset1:8 +; GFX9-NEXT: ds_read2_b32 v[2:3], v4 offset0:11 offset1:27 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 @@ -422,9 +408,8 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 ; CI-LABEL: read2_ptr_is_subreg_f32: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_add_i32_e32 v1, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_read2_b32 v[1:2], v1 offset1:8 +; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 @@ -437,8 +422,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 ; GFX9-LABEL: read2_ptr_is_subreg_f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v2 -; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:8 +; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:8 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 @@ -466,15 +450,14 @@ define amdgpu_kernel void @simple_read2_f32_volatile_0(float addrspace(1)* %out) ; CI-LABEL: simple_read2_f32_volatile_0: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_add_i32_e32 v1, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_read_b32 v2, v1 -; CI-NEXT: ds_read_b32 v1, v1 offset:32 +; CI-NEXT: ds_read_b32 v1, v0 +; CI-NEXT: ds_read_b32 v2, v0 offset:32 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_add_f32_e32 v2, v2, v1 +; CI-NEXT: v_add_f32_e32 v2, v1, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: s_endpgm @@ -482,12 +465,11 @@ define amdgpu_kernel void @simple_read2_f32_volatile_0(float addrspace(1)* %out) ; GFX9-LABEL: simple_read2_f32_volatile_0: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: v_add_u32_e32 v1, lds@abs32@lo, v0 -; GFX9-NEXT: ds_read_b32 v2, v1 -; GFX9-NEXT: ds_read_b32 v1, v1 offset:32 +; GFX9-NEXT: ds_read_b32 v1, v0 +; GFX9-NEXT: ds_read_b32 v2, v0 offset:32 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX9-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -506,15 +488,14 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out) ; CI-LABEL: simple_read2_f32_volatile_1: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_add_i32_e32 v1, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_read_b32 v2, v1 -; CI-NEXT: ds_read_b32 v1, v1 offset:32 +; CI-NEXT: ds_read_b32 v1, v0 +; CI-NEXT: ds_read_b32 v2, v0 offset:32 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_add_f32_e32 v2, v2, v1 +; CI-NEXT: v_add_f32_e32 v2, v1, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: s_endpgm @@ -522,12 +503,11 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out) ; GFX9-LABEL: simple_read2_f32_volatile_1: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: v_add_u32_e32 v1, lds@abs32@lo, v0 -; GFX9-NEXT: ds_read_b32 v2, v1 -; GFX9-NEXT: ds_read_b32 v1, v1 offset:32 +; GFX9-NEXT: ds_read_b32 v1, v0 +; GFX9-NEXT: ds_read_b32 v2, v0 offset:32 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX9-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -809,9 +789,8 @@ define amdgpu_kernel void @simple_read2_f64(double addrspace(1)* %out) #0 { ; CI-LABEL: simple_read2_f64: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v4, 3, v0 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds.f64@abs32@lo, v4 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_read2_b64 v[0:3], v0 offset1:8 +; CI-NEXT: ds_read2_b64 v[0:3], v4 offset1:8 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 @@ -824,8 +803,7 @@ define amdgpu_kernel void @simple_read2_f64(double addrspace(1)* %out) #0 { ; GFX9-LABEL: simple_read2_f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 -; GFX9-NEXT: v_add_u32_e32 v0, lds.f64@abs32@lo, v4 -; GFX9-NEXT: ds_read2_b64 v[0:3], v0 offset1:8 +; GFX9-NEXT: ds_read2_b64 v[0:3], v4 offset1:8 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] @@ -847,9 +825,8 @@ define amdgpu_kernel void @simple_read2_f64_max_offset(double addrspace(1)* %out ; CI-LABEL: simple_read2_f64_max_offset: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v4, 3, v0 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds.f64@abs32@lo, v4 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_read2_b64 v[0:3], v0 offset1:255 +; CI-NEXT: ds_read2_b64 v[0:3], v4 offset1:255 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 @@ -862,8 +839,7 @@ define amdgpu_kernel void @simple_read2_f64_max_offset(double addrspace(1)* %out ; GFX9-LABEL: simple_read2_f64_max_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 -; GFX9-NEXT: v_add_u32_e32 v0, lds.f64@abs32@lo, v4 -; GFX9-NEXT: ds_read2_b64 v[0:3], v0 offset1:255 +; GFX9-NEXT: ds_read2_b64 v[0:3], v4 offset1:255 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] @@ -885,10 +861,9 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) # ; CI-LABEL: simple_read2_f64_too_far: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; CI-NEXT: v_add_i32_e32 v3, vcc, lds.f64@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_read_b64 v[1:2], v3 -; CI-NEXT: ds_read_b64 v[3:4], v3 offset:2056 +; CI-NEXT: ds_read_b64 v[1:2], v0 +; CI-NEXT: ds_read_b64 v[3:4], v0 offset:2056 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 @@ -901,9 +876,8 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) # ; GFX9-LABEL: simple_read2_f64_too_far: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 -; GFX9-NEXT: v_add_u32_e32 v2, lds.f64@abs32@lo, v4 -; GFX9-NEXT: ds_read_b64 v[0:1], v2 -; GFX9-NEXT: ds_read_b64 v[2:3], v2 offset:2056 +; GFX9-NEXT: ds_read_b64 v[0:1], v4 +; GFX9-NEXT: ds_read_b64 v[2:3], v4 offset:2056 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] @@ -971,7 +945,7 @@ define amdgpu_kernel void @misaligned_read2_f64(double addrspace(1)* %out, doubl define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) { ; CI-LABEL: load_constant_adjacent_offsets: ; CI: ; %bb.0: -; CI-NEXT: v_mov_b32_e32 v0, foo@abs32@lo +; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -984,7 +958,7 @@ define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out ; ; GFX9-LABEL: load_constant_adjacent_offsets: ; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, foo@abs32@lo +; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1003,7 +977,7 @@ define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) { ; CI-LABEL: load_constant_disjoint_offsets: ; CI: ; %bb.0: -; CI-NEXT: v_mov_b32_e32 v0, foo@abs32@lo +; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[0:1], v0 offset1:2 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1016,7 +990,7 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out ; ; GFX9-LABEL: load_constant_disjoint_offsets: ; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, foo@abs32@lo +; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:2 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1037,7 +1011,7 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) { ; CI-LABEL: load_misaligned64_constant_offsets: ; CI: ; %bb.0: -; CI-NEXT: v_mov_b32_e32 v2, bar@abs32@lo +; CI-NEXT: v_mov_b32_e32 v2, 0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[0:1], v2 offset1:1 ; CI-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3 @@ -1052,7 +1026,7 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* ; ; GFX9-ALIGNED-LABEL: load_misaligned64_constant_offsets: ; GFX9-ALIGNED: ; %bb.0: -; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v2, bar@abs32@lo +; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-ALIGNED-NEXT: ds_read2_b32 v[0:1], v2 offset1:1 ; GFX9-ALIGNED-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3 ; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1066,7 +1040,7 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* ; ; GFX9-UNALIGNED-LABEL: load_misaligned64_constant_offsets: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, bar@abs32@lo +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-UNALIGNED-NEXT: ds_read_b128 v[0:3], v0 ; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) @@ -1088,11 +1062,8 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) { ; CI-LABEL: load_misaligned64_constant_large_offsets: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s4, bar.large@abs32@lo -; CI-NEXT: s_add_i32 s5, s4, 0x4000 -; CI-NEXT: s_addk_i32 s4, 0x7ff8 -; CI-NEXT: v_mov_b32_e32 v0, s5 -; CI-NEXT: v_mov_b32_e32 v2, s4 +; CI-NEXT: v_mov_b32_e32 v0, 0x4000 +; CI-NEXT: v_mov_b32_e32 v2, 0x7ff8 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; CI-NEXT: ds_read2_b32 v[2:3], v2 offset1:1 @@ -1107,11 +1078,8 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspac ; ; GFX9-LABEL: load_misaligned64_constant_large_offsets: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s2, bar.large@abs32@lo -; GFX9-NEXT: s_add_i32 s3, s2, 0x4000 -; GFX9-NEXT: s_addk_i32 s2, 0x7ff8 -; GFX9-NEXT: v_mov_b32_e32 v0, s3 -; GFX9-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x4000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x7ff8 ; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; GFX9-NEXT: ds_read2_b32 v[2:3], v2 offset1:1 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1399,26 +1367,25 @@ bb: define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspace(3)* %arg) { ; CI-LABEL: ds_read_call_read: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_getpc_b64 s[40:41] +; CI-NEXT: s_mov_b32 s40, s0 +; CI-NEXT: s_load_dwordx4 s[40:43], s[40:41], 0x0 ; CI-NEXT: s_load_dwordx2 s[36:37], s[0:1], 0x9 ; CI-NEXT: s_load_dword s0, s[0:1], 0xb -; CI-NEXT: s_mov_b32 s42, -1 -; CI-NEXT: s_mov_b32 s43, 0xe8f000 -; CI-NEXT: s_add_u32 s40, s40, s3 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: s_addc_u32 s41, s41, 0 +; CI-NEXT: s_mov_b32 m0, -1 +; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_add_u32 s40, s40, s3 +; CI-NEXT: s_addc_u32 s41, s41, 0 ; CI-NEXT: v_add_i32_e32 v40, vcc, s0, v0 ; CI-NEXT: s_getpc_b64 s[0:1] ; CI-NEXT: s_add_u32 s0, s0, void_func_void@gotpcrel32@lo+4 ; CI-NEXT: s_addc_u32 s1, s1, void_func_void@gotpcrel32@hi+12 -; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; CI-NEXT: ds_read_b32 v41, v40 ; CI-NEXT: s_mov_b64 s[0:1], s[40:41] ; CI-NEXT: s_mov_b64 s[2:3], s[42:43] -; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_mov_b32 s39, 0xf000 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -1431,24 +1398,23 @@ define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspa ; ; GFX9-LABEL: ds_read_call_read: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_getpc_b64 s[36:37] +; GFX9-NEXT: s_mov_b32 s36, s0 +; GFX9-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x0 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[0:1], 0x24 ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, void_func_void@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, void_func_void@gotpcrel32@hi+12 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_lshl_add_u32 v40, v0, 2, s2 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9-NEXT: ds_read_b32 v41, v40 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: ds_read_b32 v0, v40 offset:4 diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll index ef1369b24350..8346ce90b183 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-ALIGNED %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-ALIGNED %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.f64 = addrspace(3) global [512 x double] undef, align 8 @@ -16,7 +16,6 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, flo ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dword v1, v[0:1], s[0:3], 0 addr64 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: ds_write2_b32 v0, v1, v1 offset1:8 @@ -28,7 +27,6 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, flo ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] -; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ds_write2_b32 v0, v1, v1 offset1:8 ; GFX9-NEXT: s_endpgm @@ -54,7 +52,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, flo ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: buffer_load_dword v1, v[0:1], s[0:3], 0 addr64 offset:4 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: ds_write2_b32 v0, v2, v1 offset1:8 @@ -67,7 +64,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, flo ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] ; GFX9-NEXT: global_load_dword v2, v0, s[0:1] offset:4 -; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:8 ; GFX9-NEXT: s_endpgm @@ -98,7 +94,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace( ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: ds_write_b32 v0, v2 @@ -113,7 +108,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace( ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] ; GFX9-NEXT: global_load_dword v2, v0, s[2:3] -; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: ds_write_b32 v0, v1 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -146,7 +140,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace( ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: ds_write_b32 v0, v2 @@ -161,7 +154,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace( ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] ; GFX9-NEXT: global_load_dword v2, v0, s[2:3] -; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: ds_write_b32 v0, v1 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -197,7 +189,6 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspa ; CI-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[0:3], 0 addr64 ; CI-NEXT: buffer_load_dwordx2 v[1:2], v[1:2], s[0:3], 0 addr64 offset:8 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: ds_write2_b32 v0, v3, v2 offset1:8 @@ -207,13 +198,12 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspa ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 3, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[1:2], v3, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[2:3], v3, s[0:1] offset:8 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v2, lds@abs32@lo -; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 ; GFX9-NEXT: ds_write2_b32 v0, v1, v3 offset1:8 ; GFX9-NEXT: s_endpgm %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -242,7 +232,6 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)* ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx2 v[1:2], v[1:2], s[0:3], 0 addr64 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: ds_write2_b32 v0, v1, v2 offset1:8 @@ -252,8 +241,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)* ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v0 -; GFX9-NEXT: v_mov_b32_e32 v3, lds@abs32@lo -; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v3 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[1:2], v1, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -283,7 +271,6 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)* ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx4 v[1:4], v[1:2], s[0:3], 0 addr64 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: ds_write2_b32 v0, v1, v4 offset1:8 @@ -293,11 +280,10 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)* ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[1:4], v1, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v2, lds@abs32@lo -; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 ; GFX9-NEXT: ds_write2_b32 v0, v1, v4 offset1:8 ; GFX9-NEXT: s_endpgm %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -324,7 +310,6 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace( ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: buffer_load_dword v1, v[0:1], s[0:3], 0 addr64 offset:4 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: ds_write2_b32 v0, v2, v1 offset1:255 @@ -337,7 +322,6 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace( ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] ; GFX9-NEXT: global_load_dword v2, v0, s[0:1] offset:4 -; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:255 ; GFX9-NEXT: s_endpgm @@ -368,7 +352,6 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)* ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: ds_write_b32 v0, v2 @@ -383,7 +366,6 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)* ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] ; GFX9-NEXT: global_load_dword v2, v0, s[2:3] -; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: ds_write_b32 v0, v1 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -416,7 +398,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: ds_write2_b32 v0, v2, v1 offset1:8 @@ -430,7 +411,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] ; GFX9-NEXT: global_load_dword v2, v0, s[2:3] -; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:8 ; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset0:11 offset1:27 @@ -474,7 +454,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrs ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: ds_write2_b32 v0, v2, v1 offset0:3 offset1:8 @@ -488,7 +467,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrs ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] ; GFX9-NEXT: global_load_dword v2, v0, s[2:3] -; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset0:3 offset1:8 ; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset0:11 offset1:27 @@ -588,7 +566,6 @@ define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, do ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx2 v[1:2], v[0:1], s[0:3], 0 addr64 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds.f64@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: ds_write2_b64 v0, v[1:2], v[1:2] offset1:8 @@ -600,7 +577,6 @@ define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, do ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] -; GFX9-NEXT: v_add_u32_e32 v2, lds.f64@abs32@lo, v2 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset1:8 ; GFX9-NEXT: s_endpgm @@ -754,7 +730,6 @@ define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, do ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 ; CI-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8 -; CI-NEXT: v_add_i32_e32 v0, vcc, lds.f64@abs32@lo, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:8 @@ -767,7 +742,6 @@ define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, do ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] ; GFX9-NEXT: global_load_dwordx2 v[2:3], v4, s[0:1] offset:8 -; GFX9-NEXT: v_add_u32_e32 v4, lds.f64@abs32@lo, v4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:8 ; GFX9-NEXT: s_endpgm @@ -790,7 +764,7 @@ define amdgpu_kernel void @store_constant_adjacent_offsets() { ; CI-LABEL: store_constant_adjacent_offsets: ; CI: ; %bb.0: ; CI-NEXT: s_movk_i32 s0, 0x7b -; CI-NEXT: v_mov_b32_e32 v0, foo@abs32@lo +; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, s0 ; CI-NEXT: v_mov_b32_e32 v2, s0 ; CI-NEXT: s_mov_b32 m0, -1 @@ -800,7 +774,7 @@ define amdgpu_kernel void @store_constant_adjacent_offsets() { ; GFX9-LABEL: store_constant_adjacent_offsets: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_movk_i32 s0, 0x7b -; GFX9-NEXT: v_mov_b32_e32 v0, foo@abs32@lo +; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 @@ -813,17 +787,17 @@ define amdgpu_kernel void @store_constant_adjacent_offsets() { define amdgpu_kernel void @store_constant_disjoint_offsets() { ; CI-LABEL: store_constant_disjoint_offsets: ; CI: ; %bb.0: -; CI-NEXT: v_mov_b32_e32 v0, foo@abs32@lo -; CI-NEXT: v_mov_b32_e32 v1, 0x7b +; CI-NEXT: v_mov_b32_e32 v0, 0x7b +; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_write2_b32 v0, v1, v1 offset1:2 +; CI-NEXT: ds_write2_b32 v1, v0, v0 offset1:2 ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: store_constant_disjoint_offsets: ; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, foo@abs32@lo -; GFX9-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9-NEXT: ds_write2_b32 v0, v1, v1 offset1:2 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: ds_write2_b32 v1, v0, v0 offset1:2 ; GFX9-NEXT: s_endpgm store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4 @@ -835,21 +809,19 @@ define amdgpu_kernel void @store_constant_disjoint_offsets() { define amdgpu_kernel void @store_misaligned64_constant_offsets() { ; CI-LABEL: store_misaligned64_constant_offsets: ; CI: ; %bb.0: -; CI-NEXT: v_mov_b32_e32 v0, bar@abs32@lo +; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 0x7b -; CI-NEXT: v_mov_b32_e32 v2, 0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 -; CI-NEXT: ds_write2_b32 v0, v1, v2 offset0:2 offset1:3 +; CI-NEXT: ds_write2_b32 v0, v1, v0 offset1:1 +; CI-NEXT: ds_write2_b32 v0, v1, v0 offset0:2 offset1:3 ; CI-NEXT: s_endpgm ; ; GFX9-ALIGNED-LABEL: store_misaligned64_constant_offsets: ; GFX9-ALIGNED: ; %bb.0: -; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v0, bar@abs32@lo +; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-ALIGNED-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 -; GFX9-ALIGNED-NEXT: ds_write2_b32 v0, v1, v2 offset0:2 offset1:3 +; GFX9-ALIGNED-NEXT: ds_write2_b32 v0, v1, v0 offset1:1 +; GFX9-ALIGNED-NEXT: ds_write2_b32 v0, v1, v0 offset0:2 offset1:3 ; GFX9-ALIGNED-NEXT: s_endpgm ; ; GFX9-UNALIGNED-LABEL: store_misaligned64_constant_offsets: @@ -858,8 +830,7 @@ define amdgpu_kernel void @store_misaligned64_constant_offsets() { ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v4, bar@abs32@lo -; GFX9-UNALIGNED-NEXT: ds_write_b128 v4, v[0:3] +; GFX9-UNALIGNED-NEXT: ds_write_b128 v1, v[0:3] ; GFX9-UNALIGNED-NEXT: s_endpgm store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4 @@ -871,28 +842,22 @@ define amdgpu_kernel void @store_misaligned64_constant_offsets() { define amdgpu_kernel void @store_misaligned64_constant_large_offsets() { ; CI-LABEL: store_misaligned64_constant_large_offsets: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s0, bar.large@abs32@lo -; CI-NEXT: s_add_i32 s1, s0, 0x4000 -; CI-NEXT: v_mov_b32_e32 v0, s1 +; CI-NEXT: v_mov_b32_e32 v0, 0x4000 ; CI-NEXT: v_mov_b32_e32 v1, 0x7b ; CI-NEXT: v_mov_b32_e32 v2, 0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_addk_i32 s0, 0x7ff8 ; CI-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 -; CI-NEXT: v_mov_b32_e32 v0, s0 +; CI-NEXT: v_mov_b32_e32 v0, 0x7ff8 ; CI-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: store_misaligned64_constant_large_offsets: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s0, bar.large@abs32@lo -; GFX9-NEXT: s_add_i32 s1, s0, 0x4000 -; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x4000 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7b ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_addk_i32 s0, 0x7ff8 ; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 -; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x7ff8 ; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 ; GFX9-NEXT: s_endpgm store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4