R600/SI: Change how DS offsets are printed

Match SC by using offset/offset0/offset1 and printing
in decimal.

llvm-svn: 219537
This commit is contained in:
Matt Arsenault 2014-10-10 22:16:07 +00:00
parent fe0a2e677b
commit 61cc9083d0
19 changed files with 274 additions and 225 deletions

View File

@ -42,6 +42,16 @@ void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
}
void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
O << formatDec(MI->getOperand(OpNo).getImm() & 0xff);
}
void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
}
void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm())
@ -68,6 +78,27 @@ void AMDGPUInstPrinter::printMBUFOffset(const MCInst *MI, unsigned OpNo,
}
}
void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
uint16_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
O << " offset:";
printU16ImmDecOperand(MI, OpNo, O);
}
}
void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
O << " offset0:";
printU8ImmDecOperand(MI, OpNo, O);
}
void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
O << " offset1:";
printU8ImmDecOperand(MI, OpNo, O);
}
void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm())

View File

@ -34,11 +34,16 @@ public:
private:
void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O);

View File

@ -201,6 +201,15 @@ def addr64 : Operand<i1> {
def mbuf_offset : Operand<i16> {
let PrintMethod = "printMBUFOffset";
}
def ds_offset : Operand<i16> {
let PrintMethod = "printDSOffset";
}
def ds_offset0 : Operand<i8> {
let PrintMethod = "printDSOffset0";
}
def ds_offset1 : Operand<i8> {
let PrintMethod = "printDSOffset1";
}
def glc : Operand <i1> {
let PrintMethod = "printGLC";
}
@ -926,8 +935,8 @@ class DS_1A <bits<8> op, dag outs, dag ins, string asm, list<dag> pat> :
class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
op,
(outs regClass:$vdst),
(ins i1imm:$gds, VReg_32:$addr, u16imm:$offset),
asm#" $vdst, $addr, $offset, [M0]",
(ins i1imm:$gds, VReg_32:$addr, ds_offset:$offset),
asm#" $vdst, $addr"#"$offset"#" [M0]",
[]> {
let data0 = 0;
let data1 = 0;
@ -938,8 +947,8 @@ class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
class DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
op,
(outs regClass:$vdst),
(ins i1imm:$gds, VReg_32:$addr, u8imm:$offset0, u8imm:$offset1),
asm#" $vdst, $addr, $offset0, $offset1, [M0]",
(ins i1imm:$gds, VReg_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1),
asm#" $vdst, $addr"#"$offset0"#"$offset1 [M0]",
[]> {
let data0 = 0;
let data1 = 0;
@ -950,8 +959,8 @@ class DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
op,
(outs),
(ins i1imm:$gds, VReg_32:$addr, regClass:$data0, u16imm:$offset),
asm#" $addr, $data0, $offset [M0]",
(ins i1imm:$gds, VReg_32:$addr, regClass:$data0, ds_offset:$offset),
asm#" $addr, $data0"#"$offset"#" [M0]",
[]> {
let data1 = 0;
let mayStore = 1;
@ -963,8 +972,8 @@ class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
op,
(outs),
(ins i1imm:$gds, VReg_32:$addr, regClass:$data0, regClass:$data1,
u8imm:$offset0, u8imm:$offset1),
asm#" $addr, $data0, $data1, $offset0, $offset1 [M0]",
ds_offset0:$offset0, ds_offset1:$offset1),
asm#" $addr, $data0, $data1"#"$offset0"#"$offset1 [M0]",
[]> {
let mayStore = 1;
let mayLoad = 0;
@ -975,8 +984,8 @@ class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A <
op,
(outs rc:$vdst),
(ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset),
asm#" $vdst, $addr, $data0, $offset, [M0]", []>,
(ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset),
asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", []>,
AtomicNoRet<noRetOp, 1> {
let data1 = 0;
@ -990,8 +999,8 @@ class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""
class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A <
op,
(outs rc:$vdst),
(ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset),
asm#" $vdst, $addr, $data0, $data1, $offset, [M0]",
(ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset),
asm#" $vdst, $addr, $data0, $data1"#"$offset"#" [M0]",
[]>,
AtomicNoRet<noRetOp, 1> {
let mayStore = 1;
@ -1004,8 +1013,8 @@ class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""
class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A <
op,
(outs),
(ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset),
asm#" $addr, $data0, $data1, $offset, [M0]",
(ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset),
asm#" $addr, $data0, $data1"#"$offset"#" [M0]",
[]>,
AtomicNoRet<noRetOp, 0> {
let mayStore = 1;
@ -1016,8 +1025,8 @@ class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp =
class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A <
op,
(outs),
(ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset),
asm#" $addr, $data0, $offset, [M0]",
(ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset),
asm#" $addr, $data0"#"$offset"#" [M0]",
[]>,
AtomicNoRet<noRetOp, 0> {

View File

@ -10,8 +10,8 @@
; CHECK-LABEL: {{^}}do_as_ptr_calcs:
; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]],
; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 0xc
; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x14
; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, [[VREG1]] offset:12
; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
entry:
%x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0

View File

@ -1,5 +1,5 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
@ -7,7 +7,7 @@
; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
; SI: DS_CMPST_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]], 0x10, [M0]
; SI: DS_CMPST_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
; SI: S_ENDPGM
define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -26,7 +26,7 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
; SI: DS_CMPST_RTN_B64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}}, 0x20, [M0]
; SI: DS_CMPST_RTN_B64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
; SI: S_ENDPGM
define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
@ -37,9 +37,9 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset:
; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset
; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
; SI: S_ENDPGM
define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
@ -57,7 +57,7 @@ define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i3
; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
; SI: DS_CMPST_B32 [[VPTR]], [[VCMP]], [[VSWAP]], 0x10, [M0]
; SI: DS_CMPST_B32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
; SI: S_ENDPGM
define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -75,7 +75,7 @@ define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %sw
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
; SI: DS_CMPST_B64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}}, 0x20, [M0]
; SI: DS_CMPST_B64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
; SI: S_ENDPGM
define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4

View File

@ -11,7 +11,7 @@ define void @atomic_add_local(i32 addrspace(3)* %local) {
; FUNC-LABEL: {{^}}atomic_add_local_const_offset:
; R600: LDS_ADD *
; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
@ -29,7 +29,7 @@ define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
; FUNC-LABEL: {{^}}atomic_add_ret_local_const_offset:
; R600: LDS_ADD_RET *
; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst

View File

@ -11,7 +11,7 @@ define void @atomic_sub_local(i32 addrspace(3)* %local) {
; FUNC-LABEL: {{^}}atomic_sub_local_const_offset:
; R600: LDS_SUB *
; SI: DS_SUB_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_SUB_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
@ -29,7 +29,7 @@ define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
; FUNC-LABEL: {{^}}atomic_sub_ret_local_const_offset:
; R600: LDS_SUB_RET *
; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst

View File

@ -8,19 +8,19 @@ declare void @llvm.AMDGPU.barrier.local() #1
; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop:
; CHECK: BB0_1:
; CHECK: V_ADD_I32_e32 [[VADDR:v[0-9]+]],
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x0
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]]
; SI-DAG: V_ADD_I32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR4]], 0x0
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR4]]
; SI-DAG: V_ADD_I32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x80]], 0x0
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x80]]
; SI-DAG: V_ADD_I32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x84]], 0x0
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x84]]
; SI-DAG: V_ADD_I32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x100]], 0x0
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x100]]
; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]], 0x0, 0x1
; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]], 0x20, 0x21
; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x100
; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]] offset:256
; CHECK: S_ENDPGM
define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 {
entry:

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
; FIXME: We don't get cases where the address was an SGPR because we
; get a copy to the address register for each one.
@ -7,7 +7,7 @@
@lds.f64 = addrspace(3) global [512 x double] zeroinitializer, align 8
; SI-LABEL: @simple_read2_f32
; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x0, 0x8
; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
@ -26,7 +26,7 @@ define void @simple_read2_f32(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2_f32_max_offset
; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x0, 0xff
; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
@ -46,8 +46,8 @@ define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
; SI-LABEL: @simple_read2_f32_too_far
; SI-NOT DS_READ2_B32
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x404
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
; SI: S_ENDPGM
define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -63,8 +63,8 @@ define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2_f32_x2
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]], 0x0, 0x8
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]], 0xb, 0x1b
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
; SI: S_ENDPGM
define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -94,9 +94,9 @@ define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
; Make sure there is an instruction between the two sets of reads.
; SI-LABEL: @simple_read2_f32_x2_barrier
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]], 0x0, 0x8
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
; SI: S_BARRIER
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]], 0xb, 0x1b
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
; SI: S_ENDPGM
define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -130,8 +130,8 @@ define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
; element results in only folding the inner pair.
; SI-LABEL: @simple_read2_f32_x2_nonzero_base
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]], 0x2, 0x8
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]], 0xb, 0x1b
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
; SI: S_ENDPGM
define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -242,8 +242,8 @@ define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
; SI-LABEL: @simple_read2_f32_volatile_0
; SI-NOT DS_READ2_B32
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x20
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
; SI: S_ENDPGM
define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -260,8 +260,8 @@ define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
; SI-LABEL: @simple_read2_f32_volatile_1
; SI-NOT DS_READ2_B32
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x20
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
; SI: S_ENDPGM
define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -313,7 +313,7 @@ define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrs
; SI-LABEL: @simple_read2_f64
; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
; SI: DS_READ2_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]], 0x0, 0x8
; SI: DS_READ2_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
; SI: S_ENDPGM
@ -331,7 +331,7 @@ define void @simple_read2_f64(double addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2_f64_max_offset
; SI: DS_READ2_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 0x0, 0xff
; SI: DS_READ2_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
; SI: S_ENDPGM
define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -348,8 +348,8 @@ define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
; SI-LABEL: @simple_read2_f64_too_far
; SI-NOT DS_READ2_B64
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 0x0
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 0x808
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056
; SI: S_ENDPGM
define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -366,8 +366,8 @@ define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
; Alignment only 4
; SI-LABEL: @misaligned_read2_f64
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, 0x0, 0x1
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, 0xe, 0xf
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
; SI: S_ENDPGM
define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1

View File

@ -5,7 +5,7 @@
; SI-LABEL: @simple_read2st64_f32_0_1
; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x0, 0x1
; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
@ -24,7 +24,7 @@ define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2st64_f32_1_2
; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x1, 0x2
; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
@ -44,7 +44,7 @@ define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(
}
; SI-LABEL: @simple_read2st64_f32_max_offset
; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x1, 0xff
; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
@ -65,9 +65,9 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add
; SI-LABEL: @simple_read2st64_f32_over_max_offset
; SI-NOT: DS_READ2ST64_B32
; SI: DS_READ_B32 {{v[0-9]+}}, {{v[0-9]+}}, 0x100,
; SI: DS_READ_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
; SI: V_ADD_I32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
; SI: DS_READ_B32 {{v[0-9]+}}, [[BIGADD]], 0x0
; SI: DS_READ_B32 {{v[0-9]+}}, [[BIGADD]]
; SI: S_ENDPGM
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -117,7 +117,7 @@ define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2st64_f64_0_1
; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x0, 0x1
; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
@ -136,7 +136,7 @@ define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2st64_f64_1_2
; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x1, 0x2
; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
@ -158,8 +158,8 @@ define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspac
; Alignment only
; SI-LABEL: @misaligned_read2st64_f64
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, 0x0, 0x1
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, 0x80, 0x81
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
; SI: S_ENDPGM
define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -176,7 +176,7 @@ define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspac
; The maximum is not the usual 0xff because 0xff * 8 * 64 > 0xffff
; SI-LABEL: @simple_read2st64_f64_max_offset
; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}}, 0x4, 0x7f
; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
@ -197,9 +197,9 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a
; SI-LABEL: @simple_read2st64_f64_over_max_offset
; SI-NOT: DS_READ2ST64_B64
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, 0x200,
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: V_ADD_I32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]], 0x0
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
; SI: S_ENDPGM
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -237,7 +237,7 @@ define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double
; SI-LABEL: @byte_size_only_divisible_64_read2_f64
; SI-NOT: DS_READ2ST_B64
; SI: DS_READ2_B64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 0x0, 0x8
; SI: DS_READ2_B64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
; SI: S_ENDPGM
define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
@lds = addrspace(3) global [512 x float] zeroinitializer, align 4
@lds.f64 = addrspace(3) global [512 x double] zeroinitializer, align 8
@ -7,7 +7,7 @@
; SI-LABEL: @simple_write2_one_val_f32
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], [[VAL]], [[VAL]], 0x0, 0x8 [M0]
; SI: DS_WRITE2_B32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -25,7 +25,7 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1
; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]], 0x0, 0x8 [M0]
; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -43,8 +43,8 @@ define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1
; SI-LABEL: @simple_write2_two_val_f32_volatile_0
; SI-NOT: DS_WRITE2_B32
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}, 0x0
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}, 0x20
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
; SI: S_ENDPGM
define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -62,8 +62,8 @@ define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float
; SI-LABEL: @simple_write2_two_val_f32_volatile_1
; SI-NOT: DS_WRITE2_B32
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}, 0x0
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}, 0x20
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
; SI: S_ENDPGM
define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -84,7 +84,7 @@ define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float
; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
; SI: BUFFER_LOAD_DWORDX2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]], 0x0, 0x8 [M0]
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -105,7 +105,7 @@ define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2
; SI-LABEL: @simple_write2_two_val_subreg2_f32
; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]], 0x0, 0x8 [M0]
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -124,7 +124,7 @@ define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x floa
; SI-LABEL: @simple_write2_two_val_subreg4_f32
; SI-DAG: BUFFER_LOAD_DWORDX4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]], 0x0, 0x8 [M0]
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -144,7 +144,7 @@ define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x floa
; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]], 0x0, 0xff [M0]
; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
; SI: S_ENDPGM
define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -161,8 +161,8 @@ define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float
}
; SI-LABEL: @simple_write2_two_val_too_far_f32
; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x404
; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}}
; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
; SI: S_ENDPGM
define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -179,8 +179,8 @@ define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float add
}
; SI-LABEL: @simple_write2_two_val_f32_x2
; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]], 0x0, 0x8
; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]], 0xb, 0x1b
; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
; SI: S_ENDPGM
define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -209,8 +209,8 @@ define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspac
}
; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base
; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]], 0x3, 0x8
; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]], 0xb, 0x1b
; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
; SI: S_ENDPGM
define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -268,7 +268,7 @@ define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float add
; SI-LABEL: @simple_write2_one_val_f64
; SI: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]+:[0-9]+\]]],
; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI: DS_WRITE2_B64 [[VPTR]], [[VAL]], [[VAL]], 0x0, 0x8 [M0]
; SI: DS_WRITE2_B64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -285,8 +285,8 @@ define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace
; SI-LABEL: @misaligned_simple_write2_one_val_f64
; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]], 0x0, 0x1 [M0]
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]], 0xe, 0xf [M0]
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
; SI: S_ENDPGM
define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -304,7 +304,7 @@ define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, doubl
; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI: DS_WRITE2_B64 [[VPTR]], [[VAL0]], [[VAL1]], 0x0, 0x8 [M0]
; SI: DS_WRITE2_B64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1

View File

@ -7,7 +7,7 @@
; SI-LABEL: @simple_write2st64_one_val_f32_0_1
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL]], [[VAL]], 0x0, 0x1 [M0]
; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 [M0]
; SI: S_ENDPGM
define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -25,7 +25,7 @@ define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float add
; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]], 0x2, 0x5 [M0]
; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 [M0]
; SI: S_ENDPGM
define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -46,7 +46,7 @@ define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float add
; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]], 0x0, 0xff [M0]
; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
; SI: S_ENDPGM
define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -66,7 +66,7 @@ define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, fl
; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
; SI-DAG: V_ADD_I32_e32 [[VPTR:v[0-9]+]],
; SI: DS_WRITE2ST64_B64 [[VPTR]], [[VAL0]], [[VAL1]], 0x4, 0x7f [M0]
; SI: DS_WRITE2ST64_B64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 [M0]
; SI: S_ENDPGM
define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@ -85,7 +85,7 @@ define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, d
; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64
; SI-NOT: DS_WRITE2ST64_B64
; SI: DS_WRITE2_B64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0, 0x8
; SI: DS_WRITE2_B64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
; SI: S_ENDPGM
define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1

View File

@ -4,7 +4,7 @@
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
; CHECK-LABEL: {{^}}use_gep_address_space:
; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 0x40
; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}} offset:64
%p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
store i32 99, i32 addrspace(3)* %p
ret void

View File

@ -1,8 +1,8 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
; BOTH-LABEL: {{^}}local_i32_load:
; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0]
; BOTH-LABEL: {{^}}local_i32_load
; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 [M0]
; BOTH: BUFFER_STORE_DWORD [[REG]],
define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
%gep = getelementptr i32 addrspace(3)* %in, i32 7
@ -11,8 +11,8 @@ define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounw
ret void
}
; BOTH-LABEL: {{^}}local_i32_load_0_offset:
; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x0, [M0]
; BOTH-LABEL: {{^}}local_i32_load_0_offset
; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}} [M0]
; BOTH: BUFFER_STORE_DWORD [[REG]],
define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
%val = load i32 addrspace(3)* %in, align 4
@ -22,7 +22,7 @@ define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %
; BOTH-LABEL: {{^}}local_i8_load_i16_max_offset:
; BOTH-NOT: ADD
; BOTH: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, 0xffff, [M0]
; BOTH: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535 [M0]
; BOTH: BUFFER_STORE_BYTE [[REG]],
define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
%gep = getelementptr i8 addrspace(3)* %in, i32 65535
@ -37,7 +37,7 @@ define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)
; SI: S_OR_B32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
; CI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
; BOTH: V_MOV_B32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
; BOTH: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0x0, [M0]
; BOTH: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]] [M0]
; BOTH: BUFFER_STORE_BYTE [[REG]],
define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
%gep = getelementptr i8 addrspace(3)* %in, i32 65536
@ -48,7 +48,7 @@ define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspa
; BOTH-LABEL: {{^}}local_i64_load:
; BOTH-NOT: ADD
; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
%gep = getelementptr i64 addrspace(3)* %in, i32 7
@ -57,8 +57,8 @@ define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounw
ret void
}
; BOTH-LABEL: {{^}}local_i64_load_0_offset:
; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
; BOTH-LABEL: {{^}}local_i64_load_0_offset
; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
%val = load i64 addrspace(3)* %in, align 8
@ -68,7 +68,7 @@ define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %
; BOTH-LABEL: {{^}}local_f64_load:
; BOTH-NOT: ADD
; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
%gep = getelementptr double addrspace(3)* %in, i32 7
@ -77,8 +77,8 @@ define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in)
ret void
}
; BOTH-LABEL: {{^}}local_f64_load_0_offset:
; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
; BOTH-LABEL: {{^}}local_f64_load_0_offset
; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
%val = load double addrspace(3)* %in, align 8
@ -88,7 +88,7 @@ define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace
; BOTH-LABEL: {{^}}local_i64_store:
; BOTH-NOT: ADD
; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
%gep = getelementptr i64 addrspace(3)* %out, i32 7
store i64 5678, i64 addrspace(3)* %gep, align 8
@ -97,7 +97,7 @@ define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
; BOTH-LABEL: {{^}}local_i64_store_0_offset:
; BOTH-NOT: ADD
; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
store i64 1234, i64 addrspace(3)* %out, align 8
ret void
@ -105,15 +105,15 @@ define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
; BOTH-LABEL: {{^}}local_f64_store:
; BOTH-NOT: ADD
; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
define void @local_f64_store(double addrspace(3)* %out) nounwind {
%gep = getelementptr double addrspace(3)* %out, i32 7
store double 16.0, double addrspace(3)* %gep, align 8
ret void
}
; BOTH-LABEL: {{^}}local_f64_store_0_offset:
; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
; BOTH-LABEL: {{^}}local_f64_store_0_offset
; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
store double 20.0, double addrspace(3)* %out, align 8
ret void
@ -121,8 +121,9 @@ define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
; BOTH-LABEL: {{^}}local_v2i64_store:
; BOTH-NOT: ADD
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x78 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x70 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120 [M0]
; BOTH: S_ENDPGM
define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7
store <2 x i64> <i64 5678, i64 5678>, <2 x i64> addrspace(3)* %gep, align 16
@ -131,8 +132,9 @@ define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
; BOTH-LABEL: {{^}}local_v2i64_store_0_offset:
; BOTH-NOT: ADD
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
; BOTH: S_ENDPGM
define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16
ret void
@ -140,10 +142,11 @@ define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
; BOTH-LABEL: {{^}}local_v4i64_store:
; BOTH-NOT: ADD
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf8 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf0 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe8 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe0 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:232 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248 [M0]
; BOTH: S_ENDPGM
define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7
store <4 x i64> <i64 5678, i64 5678, i64 5678, i64 5678>, <4 x i64> addrspace(3)* %gep, align 16
@ -152,10 +155,11 @@ define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
; BOTH-LABEL: {{^}}local_v4i64_store_0_offset:
; BOTH-NOT: ADD
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x18 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x10 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16 [M0]
; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24 [M0]
; BOTH: S_ENDPGM
define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16
ret void

View File

@ -1,5 +1,5 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
@ -7,7 +7,7 @@
; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0]
; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
@ -18,7 +18,7 @@ define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
; EG: LDS_WRXCHG_RET *
; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -33,7 +33,7 @@ define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; SI: DS_ADD_RTN_U32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0]
; SI: DS_ADD_RTN_U32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
@ -44,7 +44,7 @@ define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
; EG: LDS_ADD_RET *
; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -55,8 +55,8 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
; EG: LDS_ADD_RET *
; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; CI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
; CI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
@ -71,7 +71,7 @@ define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 ad
; EG: LDS_ADD_RET *
; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0
; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
; SI: S_ENDPGM
define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
@ -83,7 +83,7 @@ define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; EG: LDS_ADD_RET *
; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10
; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
; SI: S_ENDPGM
define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -94,8 +94,8 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_bad_si_offset:
; EG: LDS_ADD_RET *
; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; CI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
; CI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
@ -118,7 +118,7 @@ define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
; EG: LDS_SUB_RET *
; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -131,7 +131,7 @@ define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; EG: LDS_SUB_RET *
; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0
; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
; SI: S_ENDPGM
define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
@ -143,7 +143,7 @@ define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; EG: LDS_SUB_RET *
; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10
; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
; SI: S_ENDPGM
define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -164,7 +164,7 @@ define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
; EG: LDS_AND_RET *
; SI: DS_AND_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_AND_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -185,7 +185,7 @@ define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %pt
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
; EG: LDS_OR_RET *
; SI: DS_OR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_OR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -206,7 +206,7 @@ define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
; EG: LDS_XOR_RET *
; SI: DS_XOR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_XOR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -235,7 +235,7 @@ define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
; EG: LDS_MIN_INT_RET *
; SI: DS_MIN_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_MIN_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -256,7 +256,7 @@ define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
; EG: LDS_MAX_INT_RET *
; SI: DS_MAX_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_MAX_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -277,7 +277,7 @@ define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
; EG: LDS_MIN_UINT_RET *
; SI: DS_MIN_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_MIN_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -298,7 +298,7 @@ define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
; EG: LDS_MAX_UINT_RET *
; SI: DS_MAX_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_MAX_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -311,7 +311,7 @@ define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0]
; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
; SI: S_ENDPGM
define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
@ -319,7 +319,7 @@ define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -332,7 +332,7 @@ define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; SI: DS_ADD_U32 [[VPTR]], [[DATA]], 0x0, [M0]
; SI: DS_ADD_U32 [[VPTR]], [[DATA]] [M0]
; SI: S_ENDPGM
define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
@ -340,7 +340,7 @@ define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -348,9 +348,9 @@ define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset:
; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; CI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} [M0]
; CI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
; SI: S_ENDPGM
define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
@ -363,7 +363,7 @@ define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
; SI: DS_INC_U32 v{{[0-9]+}}, [[NEGONE]], 0x0
; SI: DS_INC_U32 v{{[0-9]+}}, [[NEGONE]] [M0]
; SI: S_ENDPGM
define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
@ -373,7 +373,7 @@ define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
; SI: DS_INC_U32 v{{[0-9]+}}, [[NEGONE]], 0x10
; SI: DS_INC_U32 v{{[0-9]+}}, [[NEGONE]] offset:16
; SI: S_ENDPGM
define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -382,8 +382,8 @@ define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_bad_si_offset:
; SI: DS_INC_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; CI: DS_INC_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_INC_U32 v{{[0-9]+}}, v{{[0-9]+}}
; CI: DS_INC_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
@ -402,7 +402,7 @@ define void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
; SI: DS_SUB_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_SUB_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -413,7 +413,7 @@ define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
; SI: DS_DEC_U32 v{{[0-9]+}}, [[NEGONE]], 0x0
; SI: DS_DEC_U32 v{{[0-9]+}}, [[NEGONE]]
; SI: S_ENDPGM
define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
@ -423,7 +423,7 @@ define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
; SI: DS_DEC_U32 v{{[0-9]+}}, [[NEGONE]], 0x10
; SI: DS_DEC_U32 v{{[0-9]+}}, [[NEGONE]] offset:16
; SI: S_ENDPGM
define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -440,7 +440,7 @@ define void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
; SI: DS_AND_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_AND_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -457,7 +457,7 @@ define void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
; SI: DS_OR_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_OR_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -474,7 +474,7 @@ define void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
; SI: DS_XOR_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_XOR_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -498,7 +498,7 @@ define void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
; SI: DS_MIN_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_MIN_I32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -515,7 +515,7 @@ define void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
; SI: DS_MAX_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_MAX_I32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -532,7 +532,7 @@ define void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
; SI: DS_MIN_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_MIN_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
@ -549,7 +549,7 @@ define void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
; SI: DS_MAX_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: DS_MAX_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: S_ENDPGM
define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64:
; SI: DS_WRXCHG_RTN_B64
@ -10,7 +10,7 @@ define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
; SI: DS_WRXCHG_RTN_B64 {{.*}} 0x20
; SI: DS_WRXCHG_RTN_B64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -34,7 +34,7 @@ define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI: DS_ADD_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}, 0x20, [M0]
; SI: DS_ADD_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
; SI: S_ENDPGM
define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
@ -48,7 +48,7 @@ define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
; SI: DS_INC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
; SI: DS_INC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
; SI: S_ENDPGM
define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
@ -58,7 +58,7 @@ define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
; SI: DS_INC_RTN_U64 {{.*}} 0x20
; SI: DS_INC_RTN_U64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -77,7 +77,7 @@ define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
; SI: DS_SUB_RTN_U64 {{.*}} 0x20
; SI: DS_SUB_RTN_U64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -90,7 +90,7 @@ define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
; SI: DS_DEC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
; SI: DS_DEC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
; SI: S_ENDPGM
define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
@ -100,7 +100,7 @@ define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
; SI: DS_DEC_RTN_U64 {{.*}} 0x20
; SI: DS_DEC_RTN_U64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -119,7 +119,7 @@ define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
; SI: DS_AND_RTN_B64 {{.*}} 0x20
; SI: DS_AND_RTN_B64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -138,7 +138,7 @@ define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %pt
}
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
; SI: DS_OR_RTN_B64 {{.*}} 0x20
; SI: DS_OR_RTN_B64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -157,7 +157,7 @@ define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
; SI: DS_XOR_RTN_B64 {{.*}} 0x20
; SI: DS_XOR_RTN_B64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -184,7 +184,7 @@ define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
; SI: DS_MIN_RTN_I64 {{.*}} 0x20
; SI: DS_MIN_RTN_I64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -203,7 +203,7 @@ define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
; SI: DS_MAX_RTN_I64 {{.*}} 0x20
; SI: DS_MAX_RTN_I64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -222,7 +222,7 @@ define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
}
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
; SI: DS_MIN_RTN_U64 {{.*}} 0x20
; SI: DS_MIN_RTN_U64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -241,7 +241,7 @@ define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
}
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
; SI: DS_MAX_RTN_U64 {{.*}} 0x20
; SI: DS_MAX_RTN_U64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -259,7 +259,7 @@ define void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
; SI: DS_WRXCHG_RTN_B64 {{.*}} 0x20
; SI: DS_WRXCHG_RTN_B64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -281,7 +281,7 @@ define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI: DS_ADD_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}, 0x20, [M0]
; SI: DS_ADD_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
; SI: S_ENDPGM
define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i64 4
@ -293,7 +293,7 @@ define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
; SI: DS_INC_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
; SI: DS_INC_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; SI: S_ENDPGM
define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
@ -301,7 +301,7 @@ define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
; SI: DS_INC_U64 {{.*}} 0x20
; SI: DS_INC_U64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -318,7 +318,7 @@ define void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
; SI: DS_SUB_U64 {{.*}} 0x20
; SI: DS_SUB_U64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -330,7 +330,7 @@ define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
; SI: DS_DEC_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
; SI: DS_DEC_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; SI: S_ENDPGM
define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
@ -338,7 +338,7 @@ define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
; SI: DS_DEC_U64 {{.*}} 0x20
; SI: DS_DEC_U64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -355,7 +355,7 @@ define void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
; SI: DS_AND_B64 {{.*}} 0x20
; SI: DS_AND_B64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -372,7 +372,7 @@ define void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
; SI: DS_OR_B64 {{.*}} 0x20
; SI: DS_OR_B64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -389,7 +389,7 @@ define void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
; SI: DS_XOR_B64 {{.*}} 0x20
; SI: DS_XOR_B64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -413,7 +413,7 @@ define void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
; SI: DS_MIN_I64 {{.*}} 0x20
; SI: DS_MIN_I64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -430,7 +430,7 @@ define void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
; SI: DS_MAX_I64 {{.*}} 0x20
; SI: DS_MAX_I64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -447,7 +447,7 @@ define void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
; SI: DS_MIN_U64 {{.*}} 0x20
; SI: DS_MIN_U64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
@ -464,7 +464,7 @@ define void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
; SI: DS_MAX_U64 {{.*}} 0x20
; SI: DS_MAX_U64 {{.*}} offset:32
; SI: S_ENDPGM
define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4

View File

@ -30,9 +30,9 @@
; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
; SI: V_ADD_I32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
; SI: DS_READ_B32 {{v[0-9]+}}, [[SIPTR]], 0x0
; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 0x10
; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0x0,
; SI: DS_READ_B32 {{v[0-9]+}}, [[SIPTR]] [M0]
; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 [M0]
; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]] [M0]
define void @local_memory_two_objects(i32 addrspace(1)* %out) {
entry:

View File

@ -16,7 +16,7 @@ declare i32 @llvm.r600.read.tidig.x() #1
; SI-LABEL: {{^}}load_shl_base_lds_0:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_READ_B32 {{v[0-9]+}}, [[PTR]], 0x8, [M0]
; SI: DS_READ_B32 {{v[0-9]+}}, [[PTR]] offset:8 [M0]
; SI: S_ENDPGM
define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -33,7 +33,7 @@ define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %ad
; SI-LABEL: {{^}}load_shl_base_lds_1:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_READ_B32 [[RESULT:v[0-9]+]], [[PTR]], 0x8, [M0]
; SI: DS_READ_B32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 [M0]
; SI: V_ADD_I32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
; SI-DAG: BUFFER_STORE_DWORD [[RESULT]]
; SI-DAG: BUFFER_STORE_DWORD [[ADDUSE]]
@ -51,8 +51,8 @@ define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %ad
@maxlds = addrspace(3) global [65536 x i8] zeroinitializer, align 4
; SI-LABEL: {{^}}load_shl_base_lds_max_offset:
; SI: DS_READ_U8 v{{[0-9]+}}, v{{[0-9]+}}, 0xffff
; SI-LABEL: {{^}}load_shl_base_lds_max_offset
; SI: DS_READ_U8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
; SI: S_ENDPGM
define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -69,7 +69,7 @@ define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)
; SI-LABEL: {{^}}load_shl_base_lds_2:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI-NEXT: DS_READ2ST64_B32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]], 0x1, 0x9, [M0]
; SI-NEXT: DS_READ2ST64_B32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0]
; SI: S_ENDPGM
define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -85,7 +85,7 @@ define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
; SI-LABEL: {{^}}store_shl_base_lds_0:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_WRITE_B32 [[PTR]], {{v[0-9]+}}, 0x8 [M0]
; SI: DS_WRITE_B32 [[PTR]], {{v[0-9]+}} offset:8 [M0]
; SI: S_ENDPGM
define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -115,7 +115,7 @@ define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %a
; SI-LABEL: {{^}}atomic_cmpxchg_shl_base_lds_0:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_CMPST_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}}, 0x8
; SI: DS_CMPST_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
; SI: S_ENDPGM
define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -130,7 +130,7 @@ define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace
; SI-LABEL: {{^}}atomic_swap_shl_base_lds_0:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_WRXCHG_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
; SI: DS_WRXCHG_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: S_ENDPGM
define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -144,7 +144,7 @@ define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)
; SI-LABEL: {{^}}atomic_add_shl_base_lds_0:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_ADD_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
; SI: DS_ADD_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: S_ENDPGM
define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -158,7 +158,7 @@ define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
; SI-LABEL: {{^}}atomic_sub_shl_base_lds_0:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_SUB_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
; SI: DS_SUB_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: S_ENDPGM
define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -172,7 +172,7 @@ define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
; SI-LABEL: {{^}}atomic_and_shl_base_lds_0:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_AND_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
; SI: DS_AND_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: S_ENDPGM
define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -186,7 +186,7 @@ define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
; SI-LABEL: {{^}}atomic_or_shl_base_lds_0:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_OR_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
; SI: DS_OR_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: S_ENDPGM
define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -200,7 +200,7 @@ define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
; SI-LABEL: {{^}}atomic_xor_shl_base_lds_0:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_XOR_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
; SI: DS_XOR_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: S_ENDPGM
define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -224,7 +224,7 @@ define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
; SI-LABEL: {{^}}atomic_min_shl_base_lds_0:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_MIN_RTN_I32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
; SI: DS_MIN_RTN_I32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: S_ENDPGM
define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -238,7 +238,7 @@ define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
; SI-LABEL: {{^}}atomic_max_shl_base_lds_0:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_MAX_RTN_I32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
; SI: DS_MAX_RTN_I32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: S_ENDPGM
define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -252,7 +252,7 @@ define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
; SI-LABEL: {{^}}atomic_umin_shl_base_lds_0:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_MIN_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
; SI: DS_MIN_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: S_ENDPGM
define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
@ -266,7 +266,7 @@ define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)
; SI-LABEL: {{^}}atomic_umax_shl_base_lds_0:
; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: DS_MAX_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, 0x8
; SI: DS_MAX_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: S_ENDPGM
define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1

View File

@ -41,8 +41,8 @@ define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspac
ret void
}
; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset:
; SI: DS_READ2_B32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}}, 0x8, 0x9
; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset
; SI: DS_READ2_B32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
; SI: S_ENDPGM
define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%ptr = getelementptr i64 addrspace(3)* %in, i32 4
@ -53,7 +53,7 @@ define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out,
; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
; SI: DS_READ2_B32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}}, 0x0, 0x1
; SI: DS_READ2_B32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1
; SI: S_ENDPGM
define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
@ -79,8 +79,8 @@ define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
ret void
}
; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset:
; SI: DS_WRITE2_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x8, 0x9
; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset
; SI: DS_WRITE2_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
; SI: S_ENDPGM
define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
%ptr = getelementptr i64 addrspace(3)* %out, i32 4
@ -90,7 +90,7 @@ define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
; SI: DS_WRITE2_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}, 0x0, 0x1
; SI: DS_WRITE2_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: S_ENDPGM
define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
%ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*