diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index f0f38c193fff..64c54ccb31ff 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1029,6 +1029,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &SLC, SDValue &TFE) const { SDValue Ptr, Offen, Idxen, Addr64; + // addr64 bit was removed for volcanic islands. + if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + return false; + SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, GLC, SLC, TFE); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 8ae687c5e827..099b0b15942b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -254,6 +254,12 @@ bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl &, return false; } +bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const { + // Flat instructions do not have offsets, and only have the register + // address. + return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1); +} + bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { @@ -263,8 +269,21 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, switch (AS) { case AMDGPUAS::GLOBAL_ADDRESS: - case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions? + if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + // Assume the we will use FLAT for all global memory accesses + // on VI. + // FIXME: This assumption is currently wrong. On VI we still use + // MUBUF instructions for the r + i addressing mode. As currently + // implemented, the MUBUF instructions only work on buffer < 4GB. + // It may be possible to support > 4GB buffers with MUBUF instructions, + // by setting the stride value in the resource descriptor which would + // increase the size limit to (stride * 4GB). However, this is risky, + // because it has never been validated. + return isLegalFlatAddressingMode(AM); + } + // fall-through case AMDGPUAS::PRIVATE_ADDRESS: + case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions? case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: { // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and // additionally can do r + r + i with addr64. 32-bit has more addressing @@ -324,11 +343,9 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, return false; } - case AMDGPUAS::FLAT_ADDRESS: { - // Flat instructions do not have offsets, and only have the register - // address. - return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1); - } + case AMDGPUAS::FLAT_ADDRESS: + return isLegalFlatAddressingMode(AM); + default: llvm_unreachable("unhandled address space"); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 635b4edc89de..d84c32ec0092 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -56,6 +56,7 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; + bool isLegalFlatAddressingMode(const AddrMode &AM) const; public: SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI); diff --git a/llvm/lib/Target/AMDGPU/VIInstructions.td b/llvm/lib/Target/AMDGPU/VIInstructions.td index 5bf86e649ce0..aca46732adb9 100644 --- a/llvm/lib/Target/AMDGPU/VIInstructions.td +++ b/llvm/lib/Target/AMDGPU/VIInstructions.td @@ -103,4 +103,46 @@ def : Pat < (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset)) >; +// Patterns for global loads with no offset +class FlatLoadPat : Pat < + (vt (node i64:$addr)), + (inst $addr, 0, 0, 0) +>; + +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; + +class FlatStorePat : Pat < + (node vt:$data, i64:$addr), + (inst $data, $addr, 0, 0, 0) +>; + +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; + +class FlatAtomicPat : Pat < + (vt (node i64:$addr, vt:$data)), + (inst $addr, $data, 0, 0) +>; + +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; + + } // End Predicates = [isVI] diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index 77f7bd01b7f0..a68d110fdc96 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -1,12 +1,15 @@ -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare i32 @llvm.r600.read.tidig.x() #0 ; OPT-LABEL: @test_sink_global_small_offset_i32( -; OPT-NOT: getelementptr i32, i32 addrspace(1)* %in +; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in +; OPT-VI: getelementptr i32, i32 addrspace(1)* %in ; OPT: br i1 -; OPT: ptrtoint +; OPT-CI: ptrtoint ; GCN-LABEL: {{^}}test_sink_global_small_offset_i32: ; GCN: {{^}}BB0_2: @@ -214,8 +217,11 @@ done: } ; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32: +; VI-DAG: s_movk_i32 flat_scratch_lo, 0x0 +; VI-DAG: s_movk_i32 flat_scratch_hi, 0x0 ; GCN: s_and_saveexec_b64 -; GCN: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GCN: {{^}}BB7_2: define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll index 847950f6376e..146f0a5fbf26 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll @@ -1,7 +1,9 @@ -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=FUNC %s + ; FUNC-LABEL: {{^}}atomic_add_i32_offset: -; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -10,8 +12,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset: -; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -22,6 +24,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset: ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} + define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -32,7 +38,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset: ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -43,7 +52,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_add_i32: -; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst @@ -51,8 +60,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_add_i32_ret: -; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst @@ -62,6 +71,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_add_i32_addr64: ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -71,7 +83,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64: ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -81,7 +96,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_and_i32_offset: -; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -90,8 +105,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset: -; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -102,6 +117,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset: ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -112,7 +130,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset: ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -123,7 +144,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_and_i32: -; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst @@ -131,8 +152,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_and_i32_ret: -; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst @@ -142,6 +163,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_and_i32_addr64: ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -151,7 +175,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64: ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -161,7 +188,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_sub_i32_offset: -; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -170,8 +197,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset: -; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -182,6 +209,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset: ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -192,7 +222,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset: ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -203,7 +236,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_sub_i32: -; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst @@ -211,8 +244,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_sub_i32_ret: -; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst @@ -222,6 +255,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64: ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -231,7 +267,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64: ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -241,7 +280,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_max_i32_offset: -; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -250,8 +289,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset: -; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -262,6 +301,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset: ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -272,7 +314,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset: ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -283,7 +328,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_max_i32: -; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst @@ -291,8 +336,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_max_i32_ret: -; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst @@ -302,6 +347,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_max_i32_addr64: ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -311,7 +359,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64: ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -321,7 +372,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umax_i32_offset: -; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -330,8 +381,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset: -; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -342,6 +393,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset: ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -352,7 +406,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset: ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -363,7 +420,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umax_i32: -; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst @@ -371,8 +428,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umax_i32_ret: -; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst @@ -382,6 +439,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64: ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -391,7 +451,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64: ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -401,7 +464,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_min_i32_offset: -; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -410,8 +473,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset: -; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -422,6 +485,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset: ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -432,7 +498,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset: ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -443,7 +512,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_min_i32: -; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst @@ -451,8 +520,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_min_i32_ret: -; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst @@ -462,6 +531,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_min_i32_addr64: ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -471,7 +543,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64: ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -481,7 +556,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umin_i32_offset: -; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -490,8 +565,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset: -; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -502,6 +577,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset: ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -512,7 +590,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset: ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -523,7 +604,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umin_i32: -; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst @@ -532,7 +613,7 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_ret: ; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst @@ -542,6 +623,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64: ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -551,7 +635,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64: ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -561,7 +648,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_or_i32_offset: -; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -570,8 +657,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset: -; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -582,6 +669,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset: ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -592,7 +682,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset: ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -603,7 +696,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_or_i32: -; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst @@ -611,8 +704,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_or_i32_ret: -; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst @@ -622,6 +715,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_or_i32_addr64: ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -631,7 +727,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64: ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -641,7 +740,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xchg_i32_offset: -; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -650,8 +749,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset: -; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -672,7 +771,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset: ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -683,7 +785,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xchg_i32: -; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst @@ -691,8 +793,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret: -; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst @@ -702,6 +804,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64: ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -711,7 +816,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64: ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -721,7 +829,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xor_i32_offset: -; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -730,8 +838,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset: -; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -742,6 +850,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset: ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -752,7 +863,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset: ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -763,7 +877,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xor_i32: -; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst @@ -771,8 +885,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xor_i32_ret: -; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst @@ -782,6 +896,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64: ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -791,7 +908,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64: ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index