forked from OSchip/llvm-project
R600/SI: Don't print offset0/offset1 DS operands when they are 0
llvm-svn: 234379
This commit is contained in:
parent
0e6a264673
commit
1f3416a63d
|
@ -89,14 +89,18 @@ void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
|
|||
|
||||
void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
O << " offset0:";
|
||||
printU8ImmDecOperand(MI, OpNo, O);
|
||||
if (MI->getOperand(OpNo).getImm()) {
|
||||
O << " offset0:";
|
||||
printU8ImmDecOperand(MI, OpNo, O);
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
O << " offset1:";
|
||||
printU8ImmDecOperand(MI, OpNo, O);
|
||||
if (MI->getOperand(OpNo).getImm()) {
|
||||
O << " offset1:";
|
||||
printU8ImmDecOperand(MI, OpNo, O);
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
|
||||
|
|
|
@ -18,7 +18,7 @@ declare void @llvm.AMDGPU.barrier.local() #1
|
|||
; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
|
||||
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
|
||||
|
||||
; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
|
||||
; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1
|
||||
; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
|
||||
; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256
|
||||
; CHECK: s_endpgm
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
@lds.f64 = addrspace(3) global [512 x double] undef, align 8
|
||||
|
||||
; SI-LABEL: @simple_read2_f32
|
||||
; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
|
||||
; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:8
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
|
@ -26,7 +26,7 @@ define void @simple_read2_f32(float addrspace(1)* %out) #0 {
|
|||
}
|
||||
|
||||
; SI-LABEL: @simple_read2_f32_max_offset
|
||||
; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
|
||||
; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:255
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
|
@ -63,7 +63,7 @@ define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
|
|||
}
|
||||
|
||||
; SI-LABEL: @simple_read2_f32_x2
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
|
||||
|
@ -94,7 +94,7 @@ define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
|
|||
|
||||
; Make sure there is an instruction between the two sets of reads.
|
||||
; SI-LABEL: @simple_read2_f32_x2_barrier
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
|
||||
; SI: s_barrier
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
|
||||
; SI: s_endpgm
|
||||
|
@ -313,7 +313,7 @@ define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrs
|
|||
|
||||
; SI-LABEL: @simple_read2_f64
|
||||
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
|
||||
; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
|
||||
; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset1:8
|
||||
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
|
@ -331,7 +331,7 @@ define void @simple_read2_f64(double addrspace(1)* %out) #0 {
|
|||
}
|
||||
|
||||
; SI-LABEL: @simple_read2_f64_max_offset
|
||||
; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
|
||||
; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
@ -366,7 +366,7 @@ define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
|
|||
|
||||
; Alignment only 4
|
||||
; SI-LABEL: @misaligned_read2_f64
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
|
||||
; SI: s_endpgm
|
||||
define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
|
@ -386,7 +386,7 @@ define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)
|
|||
|
||||
; SI-LABEL: @load_constant_adjacent_offsets
|
||||
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
|
||||
define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
|
||||
%val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
|
||||
%val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
|
||||
|
@ -397,7 +397,7 @@ define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
|
|||
|
||||
; SI-LABEL: @load_constant_disjoint_offsets
|
||||
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:2
|
||||
define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
|
||||
%val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
|
||||
%val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
|
||||
|
@ -410,7 +410,7 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
|
|||
|
||||
; SI-LABEL: @load_misaligned64_constant_offsets
|
||||
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
|
||||
define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
|
||||
%val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
|
||||
|
@ -425,8 +425,8 @@ define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
|
|||
; SI-LABEL: @load_misaligned64_constant_large_offsets
|
||||
; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
|
||||
; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000
|
||||
; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
|
||||
; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
|
||||
; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset1:1
|
||||
; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset1:1
|
||||
; SI: s_endpgm
|
||||
define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
|
||||
%val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
|
||||
; SI-LABEL: @simple_read2st64_f32_0_1
|
||||
; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
|
@ -117,7 +117,7 @@ define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
|
|||
}
|
||||
|
||||
; SI-LABEL: @simple_read2st64_f64_0_1
|
||||
; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
|
@ -158,7 +158,7 @@ define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspac
|
|||
; Alignment only
|
||||
|
||||
; SI-LABEL: @misaligned_read2st64_f64
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
|
||||
; SI: s_endpgm
|
||||
define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
|
@ -237,7 +237,7 @@ define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double
|
|||
|
||||
; SI-LABEL: @byte_size_only_divisible_64_read2_f64
|
||||
; SI-NOT: ds_read2st_b64
|
||||
; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
|
||||
; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
; SI-LABEL: @simple_write2_one_val_f32
|
||||
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
|
||||
; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
@ -25,7 +25,7 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1
|
|||
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8
|
||||
; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
@ -84,7 +84,7 @@ define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float
|
|||
; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
|
||||
; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
|
||||
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
@ -105,7 +105,7 @@ define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2
|
|||
; SI-LABEL: @simple_write2_two_val_subreg2_f32
|
||||
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
@ -124,7 +124,7 @@ define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x floa
|
|||
; SI-LABEL: @simple_write2_two_val_subreg4_f32
|
||||
; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
@ -144,7 +144,7 @@ define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x floa
|
|||
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255
|
||||
; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
@ -179,7 +179,7 @@ define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float add
|
|||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_two_val_f32_x2
|
||||
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
|
||||
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset1:8
|
||||
; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
|
||||
|
@ -268,7 +268,7 @@ define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float add
|
|||
; SI-LABEL: @simple_write2_one_val_f64
|
||||
; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
|
||||
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
|
||||
; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
|
||||
; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
@ -285,7 +285,7 @@ define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace
|
|||
; SI-LABEL: @misaligned_simple_write2_one_val_f64
|
||||
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15
|
||||
; SI: s_endpgm
|
||||
define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
|
||||
|
@ -304,7 +304,7 @@ define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, doubl
|
|||
; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
|
||||
; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8
|
||||
; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
@ -324,7 +324,7 @@ define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace
|
|||
|
||||
; SI-LABEL: @store_constant_adjacent_offsets
|
||||
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
|
||||
define void @store_constant_adjacent_offsets() {
|
||||
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
|
||||
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
|
||||
|
@ -334,7 +334,7 @@ define void @store_constant_adjacent_offsets() {
|
|||
; SI-LABEL: @store_constant_disjoint_offsets
|
||||
; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
|
||||
; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
|
||||
; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset1:2
|
||||
define void @store_constant_disjoint_offsets() {
|
||||
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
|
||||
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
|
||||
|
@ -345,7 +345,7 @@ define void @store_constant_disjoint_offsets() {
|
|||
|
||||
; SI-LABEL: @store_misaligned64_constant_offsets
|
||||
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
|
||||
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
|
||||
define void @store_misaligned64_constant_offsets() {
|
||||
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
|
||||
|
@ -358,8 +358,8 @@ define void @store_misaligned64_constant_offsets() {
|
|||
; SI-LABEL: @store_misaligned64_constant_large_offsets
|
||||
; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
|
||||
; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
|
||||
; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
|
||||
; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
|
||||
; SI: s_endpgm
|
||||
define void @store_misaligned64_constant_large_offsets() {
|
||||
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
; SI-LABEL: @simple_write2st64_one_val_f32_0_1
|
||||
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1
|
||||
; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
@ -46,7 +46,7 @@ define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float add
|
|||
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255
|
||||
; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
@ -85,7 +85,7 @@ define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, d
|
|||
|
||||
; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64
|
||||
; SI-NOT: ds_write2st64_b64
|
||||
; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
|
||||
; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
|
|
@ -195,7 +195,7 @@ define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out,
|
|||
|
||||
; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
|
||||
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
|
||||
; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1
|
||||
; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
|
||||
; SI: s_endpgm
|
||||
define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
|
||||
%ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
|
||||
|
@ -243,7 +243,7 @@ define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
|
|||
|
||||
; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
|
||||
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
|
||||
; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
|
||||
; SI: s_endpgm
|
||||
define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
|
||||
%ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
|
||||
|
|
Loading…
Reference in New Issue