[AMDGPU] Fix typo in regular expression checks. NFC.

This commit is contained in:
Jay Foad 2021-04-06 12:29:48 +01:00
parent b8aba76a4e
commit 6fec0a34ce
10 changed files with 19 additions and 19 deletions

View File

@ -3,7 +3,7 @@
; GCN-LABEL: {{^}}test_loop: ; GCN-LABEL: {{^}}test_loop:
; GCN: s_and_b64 vcc, exec, -1 ; GCN: s_and_b64 vcc, exec, -1
; GCN: [[LABEL:BB[0-9+]_[0-9]+]]: ; %for.body{{$}} ; GCN: [[LABEL:BB[0-9]+_[0-9]+]]: ; %for.body{{$}}
; GCN: ds_read_b32 ; GCN: ds_read_b32
; GCN: ds_write_b32 ; GCN: ds_write_b32
; GCN: s_cbranch_vccnz [[LABEL]] ; GCN: s_cbranch_vccnz [[LABEL]]
@ -28,7 +28,7 @@ for.body:
} }
; GCN-LABEL: @loop_const_true ; GCN-LABEL: @loop_const_true
; GCN: [[LABEL:BB[0-9+]_[0-9]+]]: ; GCN: [[LABEL:BB[0-9]+_[0-9]+]]:
; GCN: ds_read_b32 ; GCN: ds_read_b32
; GCN: ds_write_b32 ; GCN: ds_write_b32
; GCN: s_branch [[LABEL]] ; GCN: s_branch [[LABEL]]

View File

@ -1,7 +1,7 @@
# RUN: llc -march=amdgcn -run-pass si-fold-operands -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s # RUN: llc -march=amdgcn -run-pass si-fold-operands -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# GCN-LABEL: name: fold-imm-copy # GCN-LABEL: name: fold-imm-copy
# GCN: [[SREG:%[0-9+]]]:sreg_32_xm0 = S_MOV_B32 65535 # GCN: [[SREG:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 65535
# GCN: V_AND_B32_e32 [[SREG]] # GCN: V_AND_B32_e32 [[SREG]]
--- ---

View File

@ -134,7 +134,7 @@ entry:
} }
; GCN-LABEL: {{^}}fptosi_f16_to_i1: ; GCN-LABEL: {{^}}fptosi_f16_to_i1:
; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}} ; SI: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}
; SI: v_cmp_eq_f32_e32 vcc, -1.0, v{{[0-9]+}} ; SI: v_cmp_eq_f32_e32 vcc, -1.0, v{{[0-9]+}}
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 0xbc00, s{{[0-9]+}} ; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 0xbc00, s{{[0-9]+}}

View File

@ -132,7 +132,7 @@ entry:
} }
; GCN-LABEL: {{^}}fptoui_f16_to_i1: ; GCN-LABEL: {{^}}fptoui_f16_to_i1:
; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}} ; SI: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}
; SI: v_cmp_eq_f32_e32 vcc, 1.0, v{{[0-9]+}} ; SI: v_cmp_eq_f32_e32 vcc, 1.0, v{{[0-9]+}}
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}} ; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}}

View File

@ -46,7 +46,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
; GFX90A-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 ; GFX90A-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
; GFX90A-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GFX90A-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
; GFX90A: v_mfma_f32_32x32x4bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX90A: v_mfma_f32_32x32x4bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
; GCN-NOT: v_accvgpr_read_b32 ; GCN-NOT: v_accvgpr_read_b32
; GCN-COUNT-8: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] ; GCN-COUNT-8: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(<32 x float> addrspace(1)* %arg) { define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(<32 x float> addrspace(1)* %arg) {
@ -64,7 +64,7 @@ bb:
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
; GCN-NOT: v_accvgpr_read_b32 ; GCN-NOT: v_accvgpr_read_b32
; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] ; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1)* %arg) { define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1)* %arg) {
@ -82,7 +82,7 @@ bb:
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
; GCN-NOT: v_accvgpr_read_b32 ; GCN-NOT: v_accvgpr_read_b32
; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]],
define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)* %arg) { define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)* %arg) {
@ -100,7 +100,7 @@ bb:
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
; GCN-NOT: v_accvgpr_read_b32 ; GCN-NOT: v_accvgpr_read_b32
; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] ; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1)* %arg) { define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1)* %arg) {
@ -118,7 +118,7 @@ bb:
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
; GCN-NOT: v_accvgpr_read_b32 ; GCN-NOT: v_accvgpr_read_b32
; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]],
define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1)* %arg) { define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1)* %arg) {

View File

@ -23,10 +23,10 @@ define amdgpu_kernel void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #
; SI: v_rsq_clamp_f64_e32 ; SI: v_rsq_clamp_f64_e32
; TODO: this constant should be folded: ; TODO: this constant should be folded:
; VI-DAG: s_mov_b32 [[NEG1:s[0-9+]]], -1 ; VI-DAG: s_mov_b32 [[NEG1:s[0-9]+]], -1
; VI-DAG: s_mov_b32 s[[LOW1:[0-9+]]], [[NEG1]] ; VI-DAG: s_mov_b32 s[[LOW1:[0-9]+]], [[NEG1]]
; VI-DAG: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff ; VI-DAG: s_mov_b32 s[[HIGH1:[0-9]+]], 0x7fefffff
; VI-DAG: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff ; VI-DAG: s_mov_b32 s[[HIGH2:[0-9]+]], 0xffefffff
; VI-DAG: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}} ; VI-DAG: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}
; VI-DAG: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]] ; VI-DAG: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
; VI-DAG: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW1]]:[[HIGH2]]] ; VI-DAG: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW1]]:[[HIGH2]]]

View File

@ -71,7 +71,7 @@ main_body:
; the offset field. ; the offset field.
; CHECK-LABEL: {{^}}soffset_no_fold: ; CHECK-LABEL: {{^}}soffset_no_fold:
; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41 ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41
; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc ; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(4)* inreg, [17 x <4 x i32>] addrspace(4)* inreg, [16 x <4 x i32>] addrspace(4)* inreg, [32 x <8 x i32>] addrspace(4)* inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(4)* inreg, [17 x <4 x i32>] addrspace(4)* inreg, [16 x <4 x i32>] addrspace(4)* inreg, [32 x <8 x i32>] addrspace(4)* inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
main_body: main_body:
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0 %tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0

View File

@ -81,7 +81,7 @@ entry:
; MUBUF: s_add_u32 s32, s32, 0x40000 ; MUBUF: s_add_u32 s32, s32, 0x40000
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Spill ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Spill
; MUBUF: s_sub_u32 s32, s32, 0x40000 ; MUBUF: s_sub_u32 s32, s32, 0x40000
; FLATSCR: s_add_u32 [[SOFF:s[0-9+]]], s32, 0x1000 ; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s32, 0x1000
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill
call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0.0, i32 %asm1.0, i32 %asm2.0, i32 %asm3.0, i32 %asm4.0, i32 %asm5.0, i32 %asm6.0, i32 %asm7.0, i32 %a) call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0.0, i32 %asm1.0, i32 %asm2.0, i32 %asm3.0, i32 %asm4.0, i32 %asm5.0, i32 %asm6.0, i32 %asm7.0, i32 %a)
@ -100,7 +100,7 @@ entry:
; MUBUF: s_add_u32 s32, s32, 0x40000 ; MUBUF: s_add_u32 s32, s32, 0x40000
; MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Reload ; MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Reload
; MUBUF: s_sub_u32 s32, s32, 0x40000 ; MUBUF: s_sub_u32 s32, s32, 0x40000
; FLATSCR: s_add_u32 [[SOFF:s[0-9+]]], s32, 0x1000 ; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s32, 0x1000
; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload ; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload
; Force %a to spill with no free SGPRs ; Force %a to spill with no free SGPRs

View File

@ -159,7 +159,7 @@ exit:
; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]: ; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
; SI: buffer_load_dword ; SI: buffer_load_dword
; SI-DAG: buffer_store_dword ; SI-DAG: buffer_store_dword
; SI-DAG: s_cmpk_lg_i32 s{{[0-9+]}}, 0x100 ; SI-DAG: s_cmpk_lg_i32 s{{[0-9]+}}, 0x100
; SI: s_cbranch_scc1 [[LABEL_LOOP]] ; SI: s_cbranch_scc1 [[LABEL_LOOP]]
; SI: [[LABEL_EXIT]]: ; SI: [[LABEL_EXIT]]:
; SI: s_endpgm ; SI: s_endpgm

View File

@ -810,7 +810,7 @@ main_body:
; GCN-LABEL: {{^}}test_wqm2: ; GCN-LABEL: {{^}}test_wqm2:
; GFX1032: s_wqm_b32 exec_lo, exec_lo ; GFX1032: s_wqm_b32 exec_lo, exec_lo
; GFX1032: s_and_b32 exec_lo, exec_lo, s{{[0-9+]}} ; GFX1032: s_and_b32 exec_lo, exec_lo, s{{[0-9]+}}
; GFX1064: s_wqm_b64 exec, exec{{$}} ; GFX1064: s_wqm_b64 exec, exec{{$}}
; GFX1064: s_and_b64 exec, exec, s[{{[0-9:]+}}] ; GFX1064: s_and_b64 exec, exec, s[{{[0-9:]+}}]
define amdgpu_ps float @test_wqm2(i32 inreg %idx0, i32 inreg %idx1) #0 { define amdgpu_ps float @test_wqm2(i32 inreg %idx0, i32 inreg %idx1) #0 {