forked from OSchip/llvm-project
AMDGPU: Remove llvm.AMDGPU.cube intrinsic
llvm-svn: 295359
This commit is contained in:
parent
eb65cda986
commit
b95ddd7cea
|
@ -64,6 +64,10 @@ def int_r600_recipsqrt_clamped : Intrinsic<
|
|||
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
|
||||
>;
|
||||
|
||||
def int_r600_cube : Intrinsic<
|
||||
[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]
|
||||
>;
|
||||
|
||||
} // End TargetPrefix = "r600"
|
||||
|
||||
let TargetPrefix = "amdgcn" in {
|
||||
|
|
|
@ -20,9 +20,6 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
|
|||
// Deprecated in favor of llvm.amdgcn.sffbh
|
||||
def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
// Deprecated in favor of separate int_amdgcn_cube* intrinsics.
|
||||
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
|
||||
// Deprecated in favor of expanded bit operations
|
||||
def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
|
|
@ -1013,7 +1013,7 @@ multiclass CUBE_Common <bits<11> inst> {
|
|||
(outs R600_Reg128:$dst),
|
||||
(ins R600_Reg128:$src0),
|
||||
"CUBE $dst $src0",
|
||||
[(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))],
|
||||
[(set v4f32:$dst, (int_r600_cube v4f32:$src0))],
|
||||
VecALU
|
||||
> {
|
||||
let isPseudo = 1;
|
||||
|
|
|
@ -817,27 +817,6 @@ def : Pat <
|
|||
|
||||
def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
|
||||
|
||||
def : Pat <
|
||||
(int_AMDGPU_cube v4f32:$src),
|
||||
(REG_SEQUENCE VReg_128,
|
||||
(V_CUBETC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
|
||||
0 /* src1_modifiers */, (f32 (EXTRACT_SUBREG $src, sub1)),
|
||||
0 /* src2_modifiers */, (f32 (EXTRACT_SUBREG $src, sub2)),
|
||||
0 /* clamp */, 0 /* omod */), sub0,
|
||||
(V_CUBESC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
|
||||
0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
|
||||
0 /* src2_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
|
||||
0 /* clamp */, 0 /* omod */), sub1,
|
||||
(V_CUBEMA_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
|
||||
0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
|
||||
0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
|
||||
0 /* clamp */, 0 /* omod */), sub2,
|
||||
(V_CUBEID_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
|
||||
0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
|
||||
0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
|
||||
0 /* clamp */, 0 /* omod */), sub3)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i32 (sext i1:$src0)),
|
||||
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
|
||||
|
|
|
@ -6,9 +6,6 @@ declare float @llvm.amdgcn.cubesc(float, float, float) #0
|
|||
declare float @llvm.amdgcn.cubetc(float, float, float) #0
|
||||
declare float @llvm.amdgcn.cubema(float, float, float) #0
|
||||
|
||||
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}cube:
|
||||
; GCN-DAG: v_cubeid_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_cubesc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
|
@ -29,18 +26,5 @@ define void @cube(<4 x float> addrspace(1)* %out, float %a, float %b, float %c)
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}legacy_cube:
|
||||
; GCN-DAG: v_cubeid_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN-DAG: v_cubesc_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN-DAG: v_cubetc_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN-DAG: v_cubema_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN: _store_dwordx4
|
||||
define void @legacy_cube(<4 x float> addrspace(1)* %out, <4 x float> %abcx) #1 {
|
||||
%cube = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %abcx)
|
||||
store <4 x float> %cube, <4 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ main_body:
|
|||
%tmp12 = insertelement <4 x float> %tmp11, float %tmp7, i32 1
|
||||
%tmp13 = insertelement <4 x float> %tmp12, float %tmp10, i32 2
|
||||
%tmp14 = insertelement <4 x float> %tmp13, float 1.000000e+00, i32 3
|
||||
%tmp15 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %tmp14)
|
||||
%tmp15 = call <4 x float> @llvm.r600.cube(<4 x float> %tmp14)
|
||||
%tmp16 = extractelement <4 x float> %tmp15, i32 0
|
||||
%tmp17 = extractelement <4 x float> %tmp15, i32 1
|
||||
%tmp18 = extractelement <4 x float> %tmp15, i32 2
|
||||
|
@ -44,7 +44,7 @@ main_body:
|
|||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
|
||||
declare <4 x float> @llvm.r600.cube(<4 x float>) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.fabs.f32(float) #0
|
|
@ -10,7 +10,7 @@ main_body:
|
|||
%tmp6 = insertelement <4 x float> %tmp5, float %tmp2, i32 1
|
||||
%tmp7 = insertelement <4 x float> %tmp6, float %tmp3, i32 2
|
||||
%tmp8 = insertelement <4 x float> %tmp7, float %tmp4, i32 3
|
||||
%tmp9 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %tmp8)
|
||||
%tmp9 = call <4 x float> @llvm.r600.cube(<4 x float> %tmp8)
|
||||
%tmp10 = extractelement <4 x float> %tmp9, i32 0
|
||||
%tmp11 = extractelement <4 x float> %tmp9, i32 1
|
||||
%tmp12 = extractelement <4 x float> %tmp9, i32 2
|
||||
|
@ -45,7 +45,7 @@ main_body:
|
|||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
|
||||
declare <4 x float> @llvm.r600.cube(<4 x float>) #0
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @fabs(float) #0
|
||||
|
|
|
@ -586,7 +586,19 @@ IF67: ; preds = %LOOP65
|
|||
%tmp449 = insertelement <4 x float> %tmp448, float %tmp445, i32 1
|
||||
%tmp450 = insertelement <4 x float> %tmp449, float %tmp447, i32 2
|
||||
%tmp451 = insertelement <4 x float> %tmp450, float %tmp194, i32 3
|
||||
%tmp452 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %tmp451)
|
||||
|
||||
%tmp451.x = extractelement <4 x float> %tmp451, i32 0
|
||||
%tmp451.y = extractelement <4 x float> %tmp451, i32 1
|
||||
%tmp451.z = extractelement <4 x float> %tmp451, i32 2
|
||||
%cubetc = call float @llvm.amdgcn.cubetc(float %tmp451.x, float %tmp451.y, float %tmp451.z)
|
||||
%cubesc = call float @llvm.amdgcn.cubesc(float %tmp451.x, float %tmp451.y, float %tmp451.z)
|
||||
%cubema = call float @llvm.amdgcn.cubema(float %tmp451.x, float %tmp451.y, float %tmp451.z)
|
||||
%cubeid = call float @llvm.amdgcn.cubeid(float %tmp451.x, float %tmp451.y, float %tmp451.z)
|
||||
%tmp452.0 = insertelement <4 x float> undef, float %cubetc, i32 0
|
||||
%tmp452.1 = insertelement <4 x float> %tmp452.0, float %cubesc, i32 1
|
||||
%tmp452.2 = insertelement <4 x float> %tmp452.1, float %cubema, i32 2
|
||||
%tmp452 = insertelement <4 x float> %tmp452.2, float %cubeid, i32 3
|
||||
|
||||
%tmp453 = extractelement <4 x float> %tmp452, i32 0
|
||||
%tmp454 = extractelement <4 x float> %tmp452, i32 1
|
||||
%tmp455 = extractelement <4 x float> %tmp452, i32 2
|
||||
|
@ -1840,9 +1852,6 @@ declare float @llvm.amdgcn.rsq.f32(float) #0
|
|||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.fabs.f32(float) #0
|
||||
|
||||
|
@ -1863,6 +1872,11 @@ declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
|
|||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
|
||||
|
||||
declare float @llvm.amdgcn.cubeid(float, float, float) #0
|
||||
declare float @llvm.amdgcn.cubesc(float, float, float) #0
|
||||
declare float @llvm.amdgcn.cubetc(float, float, float) #0
|
||||
declare float @llvm.amdgcn.cubema(float, float, float) #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
|
|
Loading…
Reference in New Issue