|
|
|
@ -3,37 +3,25 @@
|
|
|
|
|
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1013 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1013 %s
|
|
|
|
|
; RUN: not --crash llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
|
|
|
|
|
|
|
|
|
|
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(uint node_ptr, float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, uint4 texture_descr)
|
|
|
|
|
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(uint node_ptr, float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, uint4 texture_descr)
|
|
|
|
|
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(ulong node_ptr, float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, uint4 texture_descr)
|
|
|
|
|
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(ulong node_ptr, float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, uint4 texture_descr)
|
|
|
|
|
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(uint node_ptr, float ray_extent, float3 ray_origin, float3 ray_dir, float3 ray_inv_dir, uint4 texture_descr)
|
|
|
|
|
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(uint node_ptr, float ray_extent, float3 ray_origin, half3 ray_dir, half3 ray_inv_dir, uint4 texture_descr)
|
|
|
|
|
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(ulong node_ptr, float ray_extent, float3 ray_origin, float3 ray_dir, float3 ray_inv_dir, uint4 texture_descr)
|
|
|
|
|
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(ulong node_ptr, float ray_extent, float3 ray_origin, half3 ray_dir, half3 ray_inv_dir, uint4 texture_descr)
|
|
|
|
|
|
|
|
|
|
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32, float, <4 x float>, <4 x float>, <4 x float>, <4 x i32>)
|
|
|
|
|
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32, float, <4 x float>, <4 x half>, <4 x half>, <4 x i32>)
|
|
|
|
|
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64, float, <4 x float>, <4 x float>, <4 x float>, <4 x i32>)
|
|
|
|
|
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64, float, <4 x float>, <4 x half>, <4 x half>, <4 x i32>)
|
|
|
|
|
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32, float, <3 x float>, <3 x float>, <3 x float>, <4 x i32>)
|
|
|
|
|
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32, float, <3 x float>, <3 x half>, <3 x half>, <4 x i32>)
|
|
|
|
|
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64, float, <3 x float>, <3 x float>, <3 x float>, <4 x i32>)
|
|
|
|
|
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64, float, <3 x float>, <3 x half>, <3 x half>, <4 x i32>)
|
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
|
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> inreg %tdescr) {
|
|
|
|
|
; GFX1030-LABEL: image_bvh_intersect_ray:
|
|
|
|
|
; GFX1030: ; %bb.0:
|
|
|
|
|
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], [v0, v1, v2, v3, v4, v6, v7, v8, v10, v11, v12], s[0:3]
|
|
|
|
|
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GFX1030-NEXT: ; return to shader part epilog
|
|
|
|
|
;
|
|
|
|
|
; GFX1013-LABEL: image_bvh_intersect_ray:
|
|
|
|
|
; GFX1013: ; %bb.0:
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v5, v6
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v6, v7
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v7, v8
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v8, v10
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v9, v11
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v10, v12
|
|
|
|
|
; GFX1013-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]
|
|
|
|
|
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GFX1013-NEXT: ; return to shader part epilog
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> inreg %tdescr) {
|
|
|
|
|
; GCN-LABEL: image_bvh_intersect_ray:
|
|
|
|
|
; GCN: ; %bb.0:
|
|
|
|
|
; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]
|
|
|
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GCN-NEXT: ; return to shader part epilog
|
|
|
|
|
; ERR: in function image_bvh_intersect_ray{{.*}}intrinsic not supported on subtarget
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%r = bitcast <4 x i32> %v to <4 x float>
|
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
}
|
|
|
|
@ -44,60 +32,48 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_flat(i32 %node_ptr, float
|
|
|
|
|
; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]
|
|
|
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GCN-NEXT: ; return to shader part epilog
|
|
|
|
|
%ray_origin0 = insertelement <4 x float> undef, float %ray_origin_x, i32 0
|
|
|
|
|
%ray_origin1 = insertelement <4 x float> %ray_origin0, float %ray_origin_y, i32 1
|
|
|
|
|
%ray_origin = insertelement <4 x float> %ray_origin1, float %ray_origin_z, i32 2
|
|
|
|
|
%ray_dir0 = insertelement <4 x float> undef, float %ray_dir_x, i32 0
|
|
|
|
|
%ray_dir1 = insertelement <4 x float> %ray_dir0, float %ray_dir_y, i32 1
|
|
|
|
|
%ray_dir = insertelement <4 x float> %ray_dir1, float %ray_dir_z, i32 2
|
|
|
|
|
%ray_inv_dir0 = insertelement <4 x float> undef, float %ray_inv_dir_x, i32 0
|
|
|
|
|
%ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float %ray_inv_dir_y, i32 1
|
|
|
|
|
%ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float %ray_inv_dir_z, i32 2
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%ray_origin0 = insertelement <3 x float> undef, float %ray_origin_x, i32 0
|
|
|
|
|
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
|
|
|
|
|
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
|
|
|
|
|
%ray_dir0 = insertelement <3 x float> undef, float %ray_dir_x, i32 0
|
|
|
|
|
%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
|
|
|
|
|
%ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
|
|
|
|
|
%ray_inv_dir0 = insertelement <3 x float> undef, float %ray_inv_dir_x, i32 0
|
|
|
|
|
%ray_inv_dir1 = insertelement <3 x float> %ray_inv_dir0, float %ray_inv_dir_y, i32 1
|
|
|
|
|
%ray_inv_dir = insertelement <3 x float> %ray_inv_dir1, float %ray_inv_dir_z, i32 2
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%r = bitcast <4 x i32> %v to <4 x float>
|
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> inreg %tdescr) {
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> inreg %tdescr) {
|
|
|
|
|
; GCN-LABEL: image_bvh_intersect_ray_a16:
|
|
|
|
|
; GCN: ; %bb.0:
|
|
|
|
|
; GCN-NEXT: s_mov_b32 s4, 0xffff
|
|
|
|
|
; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v6
|
|
|
|
|
; GCN-NEXT: v_and_b32_e32 v10, s4, v8
|
|
|
|
|
; GCN-NEXT: v_lshrrev_b32_e32 v8, 16, v8
|
|
|
|
|
; GCN-NEXT: v_and_b32_e32 v9, s4, v9
|
|
|
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
|
|
|
; GCN-NEXT: v_lshrrev_b32_e32 v9, 16, v5
|
|
|
|
|
; GCN-NEXT: v_and_b32_e32 v10, s4, v7
|
|
|
|
|
; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v7
|
|
|
|
|
; GCN-NEXT: v_and_b32_e32 v8, s4, v8
|
|
|
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v9, 16, v9
|
|
|
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
|
|
|
; GCN-NEXT: v_and_or_b32 v5, v6, s4, v5
|
|
|
|
|
; GCN-NEXT: v_and_or_b32 v6, v7, s4, v10
|
|
|
|
|
; GCN-NEXT: v_lshl_or_b32 v7, v9, 16, v8
|
|
|
|
|
; GCN-NEXT: v_lshl_or_b32 v7, v8, 16, v7
|
|
|
|
|
; GCN-NEXT: v_and_or_b32 v5, v5, s4, v9
|
|
|
|
|
; GCN-NEXT: v_and_or_b32 v6, v6, s4, v10
|
|
|
|
|
; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:7], s[0:3] a16
|
|
|
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GCN-NEXT: ; return to shader part epilog
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%r = bitcast <4 x i32> %v to <4 x float>
|
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> inreg %tdescr) {
|
|
|
|
|
; GFX1030-LABEL: image_bvh64_intersect_ray:
|
|
|
|
|
; GFX1030: ; %bb.0:
|
|
|
|
|
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], [v0, v1, v2, v3, v4, v5, v7, v8, v9, v11, v12, v13], s[0:3]
|
|
|
|
|
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GFX1030-NEXT: ; return to shader part epilog
|
|
|
|
|
;
|
|
|
|
|
; GFX1013-LABEL: image_bvh64_intersect_ray:
|
|
|
|
|
; GFX1013: ; %bb.0:
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v6, v7
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v7, v8
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v8, v9
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v9, v11
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v10, v12
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v11, v13
|
|
|
|
|
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]
|
|
|
|
|
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GFX1013-NEXT: ; return to shader part epilog
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> inreg %tdescr) {
|
|
|
|
|
; GCN-LABEL: image_bvh64_intersect_ray:
|
|
|
|
|
; GCN: ; %bb.0:
|
|
|
|
|
; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]
|
|
|
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GCN-NEXT: ; return to shader part epilog
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%r = bitcast <4 x i32> %v to <4 x float>
|
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
}
|
|
|
|
@ -109,67 +85,70 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_flat(<2 x i32> %node_ptr
|
|
|
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GCN-NEXT: ; return to shader part epilog
|
|
|
|
|
%node_ptr = bitcast <2 x i32> %node_ptr_vec to i64
|
|
|
|
|
%ray_origin0 = insertelement <4 x float> undef, float %ray_origin_x, i32 0
|
|
|
|
|
%ray_origin1 = insertelement <4 x float> %ray_origin0, float %ray_origin_y, i32 1
|
|
|
|
|
%ray_origin = insertelement <4 x float> %ray_origin1, float %ray_origin_z, i32 2
|
|
|
|
|
%ray_dir0 = insertelement <4 x float> undef, float %ray_dir_x, i32 0
|
|
|
|
|
%ray_dir1 = insertelement <4 x float> %ray_dir0, float %ray_dir_y, i32 1
|
|
|
|
|
%ray_dir = insertelement <4 x float> %ray_dir1, float %ray_dir_z, i32 2
|
|
|
|
|
%ray_inv_dir0 = insertelement <4 x float> undef, float %ray_inv_dir_x, i32 0
|
|
|
|
|
%ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float %ray_inv_dir_y, i32 1
|
|
|
|
|
%ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float %ray_inv_dir_z, i32 2
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%ray_origin0 = insertelement <3 x float> undef, float %ray_origin_x, i32 0
|
|
|
|
|
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
|
|
|
|
|
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
|
|
|
|
|
%ray_dir0 = insertelement <3 x float> undef, float %ray_dir_x, i32 0
|
|
|
|
|
%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
|
|
|
|
|
%ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
|
|
|
|
|
%ray_inv_dir0 = insertelement <3 x float> undef, float %ray_inv_dir_x, i32 0
|
|
|
|
|
%ray_inv_dir1 = insertelement <3 x float> %ray_inv_dir0, float %ray_inv_dir_y, i32 1
|
|
|
|
|
%ray_inv_dir = insertelement <3 x float> %ray_inv_dir1, float %ray_inv_dir_z, i32 2
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%r = bitcast <4 x i32> %v to <4 x float>
|
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> inreg %tdescr) {
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> inreg %tdescr) {
|
|
|
|
|
; GCN-LABEL: image_bvh64_intersect_ray_a16:
|
|
|
|
|
; GCN: ; %bb.0:
|
|
|
|
|
; GCN-NEXT: s_mov_b32 s4, 0xffff
|
|
|
|
|
; GCN-NEXT: v_lshrrev_b32_e32 v6, 16, v7
|
|
|
|
|
; GCN-NEXT: v_and_b32_e32 v11, s4, v9
|
|
|
|
|
; GCN-NEXT: v_lshrrev_b32_e32 v9, 16, v9
|
|
|
|
|
; GCN-NEXT: v_and_b32_e32 v10, s4, v10
|
|
|
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v6, 16, v6
|
|
|
|
|
; GCN-NEXT: v_lshrrev_b32_e32 v10, 16, v6
|
|
|
|
|
; GCN-NEXT: v_and_b32_e32 v11, s4, v8
|
|
|
|
|
; GCN-NEXT: v_lshrrev_b32_e32 v8, 16, v8
|
|
|
|
|
; GCN-NEXT: v_and_b32_e32 v9, s4, v9
|
|
|
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
|
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v11, 16, v11
|
|
|
|
|
; GCN-NEXT: v_and_or_b32 v6, v7, s4, v6
|
|
|
|
|
; GCN-NEXT: v_and_or_b32 v7, v8, s4, v11
|
|
|
|
|
; GCN-NEXT: v_lshl_or_b32 v8, v10, 16, v9
|
|
|
|
|
; GCN-NEXT: v_lshl_or_b32 v8, v9, 16, v8
|
|
|
|
|
; GCN-NEXT: v_and_or_b32 v6, v6, s4, v10
|
|
|
|
|
; GCN-NEXT: v_and_or_b32 v7, v7, s4, v11
|
|
|
|
|
; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3] a16
|
|
|
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GCN-NEXT: ; return to shader part epilog
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%r = bitcast <4 x i32> %v to <4 x float>
|
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr) {
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) {
|
|
|
|
|
; GFX1030-LABEL: image_bvh_intersect_ray_vgpr_descr:
|
|
|
|
|
; GFX1030: ; %bb.0:
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v18, v0
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v19, v1
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v20, v2
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v21, v3
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v22, v4
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v23, v6
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v24, v7
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v25, v8
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v26, v10
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v27, v11
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v28, v12
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v15, v0
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v16, v1
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v17, v2
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v18, v3
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v19, v4
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v20, v5
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v21, v6
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v22, v7
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v23, v8
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v24, v9
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v25, v10
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
|
|
|
|
|
; GFX1030-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s4, v14
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s5, v15
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s6, v16
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s7, v17
|
|
|
|
|
; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[14:15]
|
|
|
|
|
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[16:17]
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s4, v11
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s5, v12
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s6, v13
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s7, v14
|
|
|
|
|
; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
|
|
|
|
|
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
|
|
|
|
|
; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo
|
|
|
|
|
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
|
|
|
|
|
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[18:33], s[4:7]
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr14_vgpr15
|
|
|
|
|
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[15:30], s[4:7]
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr15
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr16
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr17
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr18
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr19
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr20
|
|
|
|
@ -178,10 +157,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr23
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr24
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr25
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr26
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr27
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr28
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr14_vgpr15_vgpr16_vgpr17
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14
|
|
|
|
|
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
|
|
|
|
; GFX1030-NEXT: s_cbranch_execnz .LBB6_1
|
|
|
|
|
; GFX1030-NEXT: ; %bb.2:
|
|
|
|
@ -191,28 +167,24 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
|
|
|
|
|
;
|
|
|
|
|
; GFX1013-LABEL: image_bvh_intersect_ray_vgpr_descr:
|
|
|
|
|
; GFX1013: ; %bb.0:
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v5, v6
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v6, v7
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v7, v8
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v8, v10
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v9, v11
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v10, v12
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v18, v14
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v19, v15
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v16, v11
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v17, v12
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v18, v13
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v19, v14
|
|
|
|
|
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
|
|
|
|
|
; GFX1013-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s4, v18
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s5, v19
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s6, v16
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s7, v17
|
|
|
|
|
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19]
|
|
|
|
|
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[16:17]
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s4, v16
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s5, v17
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s6, v18
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s7, v19
|
|
|
|
|
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17]
|
|
|
|
|
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19]
|
|
|
|
|
; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo
|
|
|
|
|
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
|
|
|
|
|
; GFX1013-NEXT: image_bvh_intersect_ray v[20:23], v[0:15], s[4:7]
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr18_vgpr19
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr14_vgpr15_vgpr16_vgpr17
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19
|
|
|
|
|
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
|
|
|
|
|
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
|
|
|
|
; GFX1013-NEXT: s_cbranch_execnz .LBB6_1
|
|
|
|
@ -224,15 +196,186 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v2, v22
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v3, v23
|
|
|
|
|
; GFX1013-NEXT: ; return to shader part epilog
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%r = bitcast <4 x i32> %v to <4 x float>
|
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr) {
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) {
|
|
|
|
|
; GFX1030-LABEL: image_bvh_intersect_ray_a16_vgpr_descr:
|
|
|
|
|
; GFX1030: ; %bb.0:
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 s0, 0xffff
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v13, v0
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v14, v1
|
|
|
|
|
; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v5
|
|
|
|
|
; GFX1030-NEXT: v_and_b32_e32 v1, s0, v7
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v15, v2
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v16, v3
|
|
|
|
|
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 16, v7
|
|
|
|
|
; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
|
|
|
; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
|
|
|
; GFX1030-NEXT: v_and_b32_e32 v3, s0, v8
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v17, v4
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
|
|
|
|
|
; GFX1030-NEXT: v_and_or_b32 v18, v5, s0, v0
|
|
|
|
|
; GFX1030-NEXT: v_and_or_b32 v19, v6, s0, v1
|
|
|
|
|
; GFX1030-NEXT: v_lshl_or_b32 v20, v3, 16, v2
|
|
|
|
|
; GFX1030-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s4, v9
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s5, v10
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s6, v11
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s7, v12
|
|
|
|
|
; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10]
|
|
|
|
|
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12]
|
|
|
|
|
; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo
|
|
|
|
|
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
|
|
|
|
|
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[13:20], s[4:7] a16
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr9_vgpr10
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr13
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr14
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr15
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr16
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr17
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr18
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr19
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr20
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12
|
|
|
|
|
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
|
|
|
|
; GFX1030-NEXT: s_cbranch_execnz .LBB7_1
|
|
|
|
|
; GFX1030-NEXT: ; %bb.2:
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 exec_lo, s1
|
|
|
|
|
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GFX1030-NEXT: ; return to shader part epilog
|
|
|
|
|
;
|
|
|
|
|
; GFX1013-LABEL: image_bvh_intersect_ray_a16_vgpr_descr:
|
|
|
|
|
; GFX1013: ; %bb.0:
|
|
|
|
|
; GFX1013-NEXT: s_mov_b32 s0, 0xffff
|
|
|
|
|
; GFX1013-NEXT: v_lshrrev_b32_e32 v13, 16, v5
|
|
|
|
|
; GFX1013-NEXT: v_and_b32_e32 v14, s0, v7
|
|
|
|
|
; GFX1013-NEXT: v_lshrrev_b32_e32 v7, 16, v7
|
|
|
|
|
; GFX1013-NEXT: v_and_b32_e32 v8, s0, v8
|
|
|
|
|
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
|
|
|
|
|
; GFX1013-NEXT: v_lshlrev_b32_e32 v13, 16, v13
|
|
|
|
|
; GFX1013-NEXT: v_lshlrev_b32_e32 v14, 16, v14
|
|
|
|
|
; GFX1013-NEXT: v_lshl_or_b32 v7, v8, 16, v7
|
|
|
|
|
; GFX1013-NEXT: v_and_or_b32 v5, v5, s0, v13
|
|
|
|
|
; GFX1013-NEXT: v_and_or_b32 v6, v6, s0, v14
|
|
|
|
|
; GFX1013-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s4, v9
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s5, v10
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s6, v11
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s7, v12
|
|
|
|
|
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10]
|
|
|
|
|
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12]
|
|
|
|
|
; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo
|
|
|
|
|
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
|
|
|
|
|
; GFX1013-NEXT: image_bvh_intersect_ray v[13:16], v[0:7], s[4:7] a16
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr9_vgpr10
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12
|
|
|
|
|
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
|
|
|
|
|
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
|
|
|
|
; GFX1013-NEXT: s_cbranch_execnz .LBB7_1
|
|
|
|
|
; GFX1013-NEXT: ; %bb.2:
|
|
|
|
|
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
|
|
|
|
|
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v0, v13
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v1, v14
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v2, v15
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v3, v16
|
|
|
|
|
; GFX1013-NEXT: ; return to shader part epilog
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%r = bitcast <4 x i32> %v to <4 x float>
|
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) {
|
|
|
|
|
; GFX1030-LABEL: image_bvh64_intersect_ray_vgpr_descr:
|
|
|
|
|
; GFX1030: ; %bb.0:
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v16, v0
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v17, v1
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v18, v2
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v19, v3
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v20, v4
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v21, v5
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v22, v6
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v23, v7
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v24, v8
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v25, v9
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v26, v10
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v27, v11
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
|
|
|
|
|
; GFX1030-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s4, v12
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s5, v13
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s6, v14
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s7, v15
|
|
|
|
|
; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13]
|
|
|
|
|
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15]
|
|
|
|
|
; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo
|
|
|
|
|
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
|
|
|
|
|
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[16:31], s[4:7]
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr12_vgpr13
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr16
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr17
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr18
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr19
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr20
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr21
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr22
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr23
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr24
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr25
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr26
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr27
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15
|
|
|
|
|
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
|
|
|
|
; GFX1030-NEXT: s_cbranch_execnz .LBB8_1
|
|
|
|
|
; GFX1030-NEXT: ; %bb.2:
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 exec_lo, s1
|
|
|
|
|
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GFX1030-NEXT: ; return to shader part epilog
|
|
|
|
|
;
|
|
|
|
|
; GFX1013-LABEL: image_bvh64_intersect_ray_vgpr_descr:
|
|
|
|
|
; GFX1013: ; %bb.0:
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v16, v12
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v17, v13
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v18, v14
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v19, v15
|
|
|
|
|
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
|
|
|
|
|
; GFX1013-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s4, v16
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s5, v17
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s6, v18
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s7, v19
|
|
|
|
|
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17]
|
|
|
|
|
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19]
|
|
|
|
|
; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo
|
|
|
|
|
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
|
|
|
|
|
; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7]
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19
|
|
|
|
|
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
|
|
|
|
|
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
|
|
|
|
; GFX1013-NEXT: s_cbranch_execnz .LBB8_1
|
|
|
|
|
; GFX1013-NEXT: ; %bb.2:
|
|
|
|
|
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
|
|
|
|
|
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v0, v20
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v1, v21
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v2, v22
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v3, v23
|
|
|
|
|
; GFX1013-NEXT: ; return to shader part epilog
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%r = bitcast <4 x i32> %v to <4 x float>
|
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) {
|
|
|
|
|
; GFX1030-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
|
|
|
|
|
; GFX1030: ; %bb.0:
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 s0, 0xffff
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v14, v0
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v15, v1
|
|
|
|
|
; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v6
|
|
|
|
@ -244,11 +387,12 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
|
|
|
|
|
; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
|
|
|
; GFX1030-NEXT: v_and_b32_e32 v3, s0, v9
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v18, v4
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v19, v5
|
|
|
|
|
; GFX1030-NEXT: v_and_or_b32 v20, v6, s0, v0
|
|
|
|
|
; GFX1030-NEXT: v_and_or_b32 v21, v7, s0, v1
|
|
|
|
|
; GFX1030-NEXT: v_lshl_or_b32 v22, v3, 16, v2
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
|
|
|
|
|
; GFX1030-NEXT: v_and_or_b32 v19, v6, s0, v0
|
|
|
|
|
; GFX1030-NEXT: v_and_or_b32 v20, v7, s0, v1
|
|
|
|
|
; GFX1030-NEXT: v_lshl_or_b32 v21, v3, 16, v2
|
|
|
|
|
; GFX1030-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
|
; GFX1030-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s4, v10
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s5, v11
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s6, v12
|
|
|
|
@ -257,7 +401,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
|
|
|
|
|
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
|
|
|
|
|
; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo
|
|
|
|
|
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
|
|
|
|
|
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[14:21], s[4:7] a16
|
|
|
|
|
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[14:29], s[4:7] a16
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr14
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr15
|
|
|
|
@ -267,185 +411,9 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr19
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr20
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr21
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr22
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
|
|
|
|
|
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
|
|
|
|
; GFX1030-NEXT: s_cbranch_execnz .LBB7_1
|
|
|
|
|
; GFX1030-NEXT: ; %bb.2:
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 exec_lo, s1
|
|
|
|
|
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GFX1030-NEXT: ; return to shader part epilog
|
|
|
|
|
;
|
|
|
|
|
; GFX1013-LABEL: image_bvh_intersect_ray_a16_vgpr_descr:
|
|
|
|
|
; GFX1013: ; %bb.0:
|
|
|
|
|
; GFX1013-NEXT: s_mov_b32 s0, 0xffff
|
|
|
|
|
; GFX1013-NEXT: v_lshrrev_b32_e32 v5, 16, v6
|
|
|
|
|
; GFX1013-NEXT: v_and_b32_e32 v14, s0, v8
|
|
|
|
|
; GFX1013-NEXT: v_lshrrev_b32_e32 v8, 16, v8
|
|
|
|
|
; GFX1013-NEXT: v_and_b32_e32 v9, s0, v9
|
|
|
|
|
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
|
|
|
|
|
; GFX1013-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
|
|
|
; GFX1013-NEXT: v_lshlrev_b32_e32 v14, 16, v14
|
|
|
|
|
; GFX1013-NEXT: v_and_or_b32 v5, v6, s0, v5
|
|
|
|
|
; GFX1013-NEXT: v_and_or_b32 v6, v7, s0, v14
|
|
|
|
|
; GFX1013-NEXT: v_lshl_or_b32 v7, v9, 16, v8
|
|
|
|
|
; GFX1013-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s4, v10
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s5, v11
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s6, v12
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s7, v13
|
|
|
|
|
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
|
|
|
|
|
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
|
|
|
|
|
; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo
|
|
|
|
|
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
|
|
|
|
|
; GFX1013-NEXT: image_bvh_intersect_ray v[14:17], v[0:7], s[4:7] a16
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr10_vgpr11
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
|
|
|
|
|
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
|
|
|
|
|
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
|
|
|
|
; GFX1013-NEXT: s_cbranch_execnz .LBB7_1
|
|
|
|
|
; GFX1013-NEXT: ; %bb.2:
|
|
|
|
|
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
|
|
|
|
|
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v0, v14
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v1, v15
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v2, v16
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v3, v17
|
|
|
|
|
; GFX1013-NEXT: ; return to shader part epilog
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%r = bitcast <4 x i32> %v to <4 x float>
|
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr) {
|
|
|
|
|
; GFX1030-LABEL: image_bvh64_intersect_ray_vgpr_descr:
|
|
|
|
|
; GFX1030: ; %bb.0:
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v19, v0
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v20, v1
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v21, v2
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v22, v3
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v23, v4
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v24, v5
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v25, v7
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v26, v8
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v27, v9
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v28, v11
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v29, v12
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v30, v13
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
|
|
|
|
|
; GFX1030-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s4, v15
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s5, v16
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s6, v17
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s7, v18
|
|
|
|
|
; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[15:16]
|
|
|
|
|
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[17:18]
|
|
|
|
|
; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo
|
|
|
|
|
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
|
|
|
|
|
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[19:34], s[4:7]
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr15_vgpr16
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr19
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr20
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr21
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr22
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr23
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr24
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr25
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr26
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr27
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr28
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr29
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr30
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18
|
|
|
|
|
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
|
|
|
|
; GFX1030-NEXT: s_cbranch_execnz .LBB8_1
|
|
|
|
|
; GFX1030-NEXT: ; %bb.2:
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 exec_lo, s1
|
|
|
|
|
; GFX1030-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GFX1030-NEXT: ; return to shader part epilog
|
|
|
|
|
;
|
|
|
|
|
; GFX1013-LABEL: image_bvh64_intersect_ray_vgpr_descr:
|
|
|
|
|
; GFX1013: ; %bb.0:
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v6, v7
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v7, v8
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v8, v9
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v9, v11
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v10, v12
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v11, v13
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v19, v15
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v20, v16
|
|
|
|
|
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
|
|
|
|
|
; GFX1013-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s4, v19
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s5, v20
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s6, v17
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s7, v18
|
|
|
|
|
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[19:20]
|
|
|
|
|
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[17:18]
|
|
|
|
|
; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo
|
|
|
|
|
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
|
|
|
|
|
; GFX1013-NEXT: image_bvh64_intersect_ray v[21:24], v[0:15], s[4:7]
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr19_vgpr20
|
|
|
|
|
; GFX1013-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18
|
|
|
|
|
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
|
|
|
|
|
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
|
|
|
|
; GFX1013-NEXT: s_cbranch_execnz .LBB8_1
|
|
|
|
|
; GFX1013-NEXT: ; %bb.2:
|
|
|
|
|
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
|
|
|
|
|
; GFX1013-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v0, v21
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v1, v22
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v2, v23
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v3, v24
|
|
|
|
|
; GFX1013-NEXT: ; return to shader part epilog
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%r = bitcast <4 x i32> %v to <4 x float>
|
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr) {
|
|
|
|
|
; GFX1030-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
|
|
|
|
|
; GFX1030: ; %bb.0:
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 s0, 0xffff
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v15, v0
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v16, v1
|
|
|
|
|
; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v7
|
|
|
|
|
; GFX1030-NEXT: v_and_b32_e32 v1, s0, v9
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v17, v2
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v18, v3
|
|
|
|
|
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 16, v9
|
|
|
|
|
; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
|
|
|
; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
|
|
|
; GFX1030-NEXT: v_and_b32_e32 v3, s0, v10
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v19, v4
|
|
|
|
|
; GFX1030-NEXT: v_mov_b32_e32 v20, v5
|
|
|
|
|
; GFX1030-NEXT: v_and_or_b32 v21, v7, s0, v0
|
|
|
|
|
; GFX1030-NEXT: v_and_or_b32 v22, v8, s0, v1
|
|
|
|
|
; GFX1030-NEXT: v_lshl_or_b32 v23, v3, 16, v2
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
|
|
|
|
|
; GFX1030-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s4, v11
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s5, v12
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s6, v13
|
|
|
|
|
; GFX1030-NEXT: v_readfirstlane_b32 s7, v14
|
|
|
|
|
; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
|
|
|
|
|
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
|
|
|
|
|
; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo
|
|
|
|
|
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
|
|
|
|
|
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[15:30], s[4:7] a16
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr15
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr16
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr17
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr18
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr19
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr20
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr21
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr22
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr23
|
|
|
|
|
; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14
|
|
|
|
|
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
|
|
|
|
; GFX1030-NEXT: s_cbranch_execnz .LBB9_1
|
|
|
|
|
; GFX1030-NEXT: ; %bb.2:
|
|
|
|
|
; GFX1030-NEXT: s_mov_b32 exec_lo, s1
|
|
|
|
@ -455,20 +423,20 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
|
|
|
|
|
; GFX1013-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
|
|
|
|
|
; GFX1013: ; %bb.0:
|
|
|
|
|
; GFX1013-NEXT: s_mov_b32 s0, 0xffff
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v16, v11
|
|
|
|
|
; GFX1013-NEXT: v_lshrrev_b32_e32 v6, 16, v7
|
|
|
|
|
; GFX1013-NEXT: v_and_b32_e32 v11, s0, v9
|
|
|
|
|
; GFX1013-NEXT: v_lshrrev_b32_e32 v9, 16, v9
|
|
|
|
|
; GFX1013-NEXT: v_and_b32_e32 v10, s0, v10
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v17, v12
|
|
|
|
|
; GFX1013-NEXT: v_lshlrev_b32_e32 v6, 16, v6
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v16, v10
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v17, v11
|
|
|
|
|
; GFX1013-NEXT: v_lshrrev_b32_e32 v10, 16, v6
|
|
|
|
|
; GFX1013-NEXT: v_and_b32_e32 v11, s0, v8
|
|
|
|
|
; GFX1013-NEXT: v_lshrrev_b32_e32 v8, 16, v8
|
|
|
|
|
; GFX1013-NEXT: v_and_b32_e32 v9, s0, v9
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v18, v12
|
|
|
|
|
; GFX1013-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
|
|
|
; GFX1013-NEXT: v_lshlrev_b32_e32 v11, 16, v11
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v18, v13
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v19, v14
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v19, v13
|
|
|
|
|
; GFX1013-NEXT: v_lshl_or_b32 v8, v9, 16, v8
|
|
|
|
|
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
|
|
|
|
|
; GFX1013-NEXT: v_and_or_b32 v6, v7, s0, v6
|
|
|
|
|
; GFX1013-NEXT: v_and_or_b32 v7, v8, s0, v11
|
|
|
|
|
; GFX1013-NEXT: v_lshl_or_b32 v8, v10, 16, v9
|
|
|
|
|
; GFX1013-NEXT: v_and_or_b32 v6, v6, s0, v10
|
|
|
|
|
; GFX1013-NEXT: v_and_or_b32 v7, v7, s0, v11
|
|
|
|
|
; GFX1013-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s4, v16
|
|
|
|
|
; GFX1013-NEXT: v_readfirstlane_b32 s5, v17
|
|
|
|
@ -493,7 +461,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v2, v22
|
|
|
|
|
; GFX1013-NEXT: v_mov_b32_e32 v3, v23
|
|
|
|
|
; GFX1013-NEXT: ; return to shader part epilog
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%r = bitcast <4 x i32> %v to <4 x float>
|
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
}
|
|
|
|
@ -567,16 +535,16 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr
|
|
|
|
|
%node_ptr = load i32, i32* %gep_node_ptr, align 4
|
|
|
|
|
%gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
|
|
|
|
|
%ray_extent = load float, float* %gep_ray, align 4
|
|
|
|
|
%ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0
|
|
|
|
|
%ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1
|
|
|
|
|
%ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2
|
|
|
|
|
%ray_dir0 = insertelement <4 x float> undef, float 3.0, i32 0
|
|
|
|
|
%ray_dir1 = insertelement <4 x float> %ray_dir0, float 4.0, i32 1
|
|
|
|
|
%ray_dir = insertelement <4 x float> %ray_dir1, float 5.0, i32 2
|
|
|
|
|
%ray_inv_dir0 = insertelement <4 x float> undef, float 6.0, i32 0
|
|
|
|
|
%ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float 7.0, i32 1
|
|
|
|
|
%ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float 8.0, i32 2
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0
|
|
|
|
|
%ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1
|
|
|
|
|
%ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2
|
|
|
|
|
%ray_dir0 = insertelement <3 x float> undef, float 3.0, i32 0
|
|
|
|
|
%ray_dir1 = insertelement <3 x float> %ray_dir0, float 4.0, i32 1
|
|
|
|
|
%ray_dir = insertelement <3 x float> %ray_dir1, float 5.0, i32 2
|
|
|
|
|
%ray_inv_dir0 = insertelement <3 x float> undef, float 6.0, i32 0
|
|
|
|
|
%ray_inv_dir1 = insertelement <3 x float> %ray_inv_dir0, float 7.0, i32 1
|
|
|
|
|
%ray_inv_dir = insertelement <3 x float> %ray_inv_dir1, float 8.0, i32 2
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
store <4 x i32> %v, <4 x i32>* undef
|
|
|
|
|
ret void
|
|
|
|
|
}
|
|
|
|
@ -680,16 +648,16 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node
|
|
|
|
|
%node_ptr = load i32, i32* %gep_node_ptr, align 4
|
|
|
|
|
%gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
|
|
|
|
|
%ray_extent = load float, float* %gep_ray, align 4
|
|
|
|
|
%ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0
|
|
|
|
|
%ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1
|
|
|
|
|
%ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2
|
|
|
|
|
%ray_dir0 = insertelement <4 x half> undef, half 3.0, i32 0
|
|
|
|
|
%ray_dir1 = insertelement <4 x half> %ray_dir0, half 4.0, i32 1
|
|
|
|
|
%ray_dir = insertelement <4 x half> %ray_dir1, half 5.0, i32 2
|
|
|
|
|
%ray_inv_dir0 = insertelement <4 x half> undef, half 6.0, i32 0
|
|
|
|
|
%ray_inv_dir1 = insertelement <4 x half> %ray_inv_dir0, half 7.0, i32 1
|
|
|
|
|
%ray_inv_dir = insertelement <4 x half> %ray_inv_dir1, half 8.0, i32 2
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0
|
|
|
|
|
%ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1
|
|
|
|
|
%ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2
|
|
|
|
|
%ray_dir0 = insertelement <3 x half> undef, half 3.0, i32 0
|
|
|
|
|
%ray_dir1 = insertelement <3 x half> %ray_dir0, half 4.0, i32 1
|
|
|
|
|
%ray_dir = insertelement <3 x half> %ray_dir1, half 5.0, i32 2
|
|
|
|
|
%ray_inv_dir0 = insertelement <3 x half> undef, half 6.0, i32 0
|
|
|
|
|
%ray_inv_dir1 = insertelement <3 x half> %ray_inv_dir0, half 7.0, i32 1
|
|
|
|
|
%ray_inv_dir = insertelement <3 x half> %ray_inv_dir1, half 8.0, i32 2
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
store <4 x i32> %v, <4 x i32>* undef
|
|
|
|
|
ret void
|
|
|
|
|
}
|
|
|
|
@ -755,16 +723,16 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray,
|
|
|
|
|
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
|
%gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
|
|
|
|
|
%ray_extent = load float, float* %gep_ray, align 4
|
|
|
|
|
%ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0
|
|
|
|
|
%ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1
|
|
|
|
|
%ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2
|
|
|
|
|
%ray_dir0 = insertelement <4 x float> undef, float 3.0, i32 0
|
|
|
|
|
%ray_dir1 = insertelement <4 x float> %ray_dir0, float 4.0, i32 1
|
|
|
|
|
%ray_dir = insertelement <4 x float> %ray_dir1, float 5.0, i32 2
|
|
|
|
|
%ray_inv_dir0 = insertelement <4 x float> undef, float 6.0, i32 0
|
|
|
|
|
%ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float 7.0, i32 1
|
|
|
|
|
%ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float 8.0, i32 2
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 1111111111111, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0
|
|
|
|
|
%ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1
|
|
|
|
|
%ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2
|
|
|
|
|
%ray_dir0 = insertelement <3 x float> undef, float 3.0, i32 0
|
|
|
|
|
%ray_dir1 = insertelement <3 x float> %ray_dir0, float 4.0, i32 1
|
|
|
|
|
%ray_dir = insertelement <3 x float> %ray_dir1, float 5.0, i32 2
|
|
|
|
|
%ray_inv_dir0 = insertelement <3 x float> undef, float 6.0, i32 0
|
|
|
|
|
%ray_inv_dir1 = insertelement <3 x float> %ray_inv_dir0, float 7.0, i32 1
|
|
|
|
|
%ray_inv_dir = insertelement <3 x float> %ray_inv_dir1, float 8.0, i32 2
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 1111111111111, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
store <4 x i32> %v, <4 x i32>* undef
|
|
|
|
|
ret void
|
|
|
|
|
}
|
|
|
|
@ -860,16 +828,16 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_
|
|
|
|
|
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
|
%gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
|
|
|
|
|
%ray_extent = load float, float* %gep_ray, align 4
|
|
|
|
|
%ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0
|
|
|
|
|
%ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1
|
|
|
|
|
%ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2
|
|
|
|
|
%ray_dir0 = insertelement <4 x half> undef, half 3.0, i32 0
|
|
|
|
|
%ray_dir1 = insertelement <4 x half> %ray_dir0, half 4.0, i32 1
|
|
|
|
|
%ray_dir = insertelement <4 x half> %ray_dir1, half 5.0, i32 2
|
|
|
|
|
%ray_inv_dir0 = insertelement <4 x half> undef, half 6.0, i32 0
|
|
|
|
|
%ray_inv_dir1 = insertelement <4 x half> %ray_inv_dir0, half 7.0, i32 1
|
|
|
|
|
%ray_inv_dir = insertelement <4 x half> %ray_inv_dir1, half 8.0, i32 2
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 1111111111110, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
%ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0
|
|
|
|
|
%ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1
|
|
|
|
|
%ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2
|
|
|
|
|
%ray_dir0 = insertelement <3 x half> undef, half 3.0, i32 0
|
|
|
|
|
%ray_dir1 = insertelement <3 x half> %ray_dir0, half 4.0, i32 1
|
|
|
|
|
%ray_dir = insertelement <3 x half> %ray_dir1, half 5.0, i32 2
|
|
|
|
|
%ray_inv_dir0 = insertelement <3 x half> undef, half 6.0, i32 0
|
|
|
|
|
%ray_inv_dir1 = insertelement <3 x half> %ray_inv_dir0, half 7.0, i32 1
|
|
|
|
|
%ray_inv_dir = insertelement <3 x half> %ray_inv_dir1, half 8.0, i32 2
|
|
|
|
|
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 1111111111110, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr)
|
|
|
|
|
store <4 x i32> %v, <4 x i32>* undef
|
|
|
|
|
ret void
|
|
|
|
|
}
|
|
|
|
|