forked from OSchip/llvm-project
[AMDGPU] Fix typo in regular expression checks. NFC.
This commit is contained in:
parent
b8aba76a4e
commit
6fec0a34ce
|
@ -3,7 +3,7 @@
|
|||
|
||||
; GCN-LABEL: {{^}}test_loop:
|
||||
; GCN: s_and_b64 vcc, exec, -1
|
||||
; GCN: [[LABEL:BB[0-9+]_[0-9]+]]: ; %for.body{{$}}
|
||||
; GCN: [[LABEL:BB[0-9]+_[0-9]+]]: ; %for.body{{$}}
|
||||
; GCN: ds_read_b32
|
||||
; GCN: ds_write_b32
|
||||
; GCN: s_cbranch_vccnz [[LABEL]]
|
||||
|
@ -28,7 +28,7 @@ for.body:
|
|||
}
|
||||
|
||||
; GCN-LABEL: @loop_const_true
|
||||
; GCN: [[LABEL:BB[0-9+]_[0-9]+]]:
|
||||
; GCN: [[LABEL:BB[0-9]+_[0-9]+]]:
|
||||
; GCN: ds_read_b32
|
||||
; GCN: ds_write_b32
|
||||
; GCN: s_branch [[LABEL]]
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# RUN: llc -march=amdgcn -run-pass si-fold-operands -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
# GCN-LABEL: name: fold-imm-copy
|
||||
# GCN: [[SREG:%[0-9+]]]:sreg_32_xm0 = S_MOV_B32 65535
|
||||
# GCN: [[SREG:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 65535
|
||||
# GCN: V_AND_B32_e32 [[SREG]]
|
||||
|
||||
---
|
||||
|
|
|
@ -134,7 +134,7 @@ entry:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fptosi_f16_to_i1:
|
||||
; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}}
|
||||
; SI: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; SI: v_cmp_eq_f32_e32 vcc, -1.0, v{{[0-9]+}}
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
|
||||
; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 0xbc00, s{{[0-9]+}}
|
||||
|
|
|
@ -132,7 +132,7 @@ entry:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fptoui_f16_to_i1:
|
||||
; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}}
|
||||
; SI: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; SI: v_cmp_eq_f32_e32 vcc, 1.0, v{{[0-9]+}}
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
|
||||
; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}}
|
||||
|
|
|
@ -46,7 +46,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
|
|||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GFX90A-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GFX90A: v_mfma_f32_32x32x4bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX90A: v_mfma_f32_32x32x4bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
; GCN-COUNT-8: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
|
||||
define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(<32 x float> addrspace(1)* %arg) {
|
||||
|
@ -64,7 +64,7 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
|
||||
define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1)* %arg) {
|
||||
|
@ -82,7 +82,7 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]],
|
||||
define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)* %arg) {
|
||||
|
@ -100,7 +100,7 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
|
||||
define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1)* %arg) {
|
||||
|
@ -118,7 +118,7 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]],
|
||||
define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1)* %arg) {
|
||||
|
|
|
@ -23,10 +23,10 @@ define amdgpu_kernel void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #
|
|||
; SI: v_rsq_clamp_f64_e32
|
||||
|
||||
; TODO: this constant should be folded:
|
||||
; VI-DAG: s_mov_b32 [[NEG1:s[0-9+]]], -1
|
||||
; VI-DAG: s_mov_b32 s[[LOW1:[0-9+]]], [[NEG1]]
|
||||
; VI-DAG: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
|
||||
; VI-DAG: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
|
||||
; VI-DAG: s_mov_b32 [[NEG1:s[0-9]+]], -1
|
||||
; VI-DAG: s_mov_b32 s[[LOW1:[0-9]+]], [[NEG1]]
|
||||
; VI-DAG: s_mov_b32 s[[HIGH1:[0-9]+]], 0x7fefffff
|
||||
; VI-DAG: s_mov_b32 s[[HIGH2:[0-9]+]], 0xffefffff
|
||||
; VI-DAG: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}
|
||||
; VI-DAG: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
|
||||
; VI-DAG: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW1]]:[[HIGH2]]]
|
||||
|
|
|
@ -71,7 +71,7 @@ main_body:
|
|||
; the offset field.
|
||||
; CHECK-LABEL: {{^}}soffset_no_fold:
|
||||
; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41
|
||||
; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
|
||||
; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
|
||||
define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(4)* inreg, [17 x <4 x i32>] addrspace(4)* inreg, [16 x <4 x i32>] addrspace(4)* inreg, [32 x <8 x i32>] addrspace(4)* inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
|
||||
main_body:
|
||||
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0
|
||||
|
|
|
@ -81,7 +81,7 @@ entry:
|
|||
; MUBUF: s_add_u32 s32, s32, 0x40000
|
||||
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Spill
|
||||
; MUBUF: s_sub_u32 s32, s32, 0x40000
|
||||
; FLATSCR: s_add_u32 [[SOFF:s[0-9+]]], s32, 0x1000
|
||||
; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s32, 0x1000
|
||||
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill
|
||||
call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0.0, i32 %asm1.0, i32 %asm2.0, i32 %asm3.0, i32 %asm4.0, i32 %asm5.0, i32 %asm6.0, i32 %asm7.0, i32 %a)
|
||||
|
||||
|
@ -100,7 +100,7 @@ entry:
|
|||
; MUBUF: s_add_u32 s32, s32, 0x40000
|
||||
; MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Reload
|
||||
; MUBUF: s_sub_u32 s32, s32, 0x40000
|
||||
; FLATSCR: s_add_u32 [[SOFF:s[0-9+]]], s32, 0x1000
|
||||
; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s32, 0x1000
|
||||
; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload
|
||||
|
||||
; Force %a to spill with no free SGPRs
|
||||
|
|
|
@ -159,7 +159,7 @@ exit:
|
|||
; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; SI: buffer_load_dword
|
||||
; SI-DAG: buffer_store_dword
|
||||
; SI-DAG: s_cmpk_lg_i32 s{{[0-9+]}}, 0x100
|
||||
; SI-DAG: s_cmpk_lg_i32 s{{[0-9]+}}, 0x100
|
||||
; SI: s_cbranch_scc1 [[LABEL_LOOP]]
|
||||
; SI: [[LABEL_EXIT]]:
|
||||
; SI: s_endpgm
|
||||
|
|
|
@ -810,7 +810,7 @@ main_body:
|
|||
|
||||
; GCN-LABEL: {{^}}test_wqm2:
|
||||
; GFX1032: s_wqm_b32 exec_lo, exec_lo
|
||||
; GFX1032: s_and_b32 exec_lo, exec_lo, s{{[0-9+]}}
|
||||
; GFX1032: s_and_b32 exec_lo, exec_lo, s{{[0-9]+}}
|
||||
; GFX1064: s_wqm_b64 exec, exec{{$}}
|
||||
; GFX1064: s_and_b64 exec, exec, s[{{[0-9:]+}}]
|
||||
define amdgpu_ps float @test_wqm2(i32 inreg %idx0, i32 inreg %idx1) #0 {
|
||||
|
|
Loading…
Reference in New Issue