forked from OSchip/llvm-project
AMDGPU : Add intrinsic for instruction v_cvt_pk_u8_f32
Differential Revision: http://reviews.llvm.org/D23336 llvm-svn: 278403
This commit is contained in:
parent
3ab5816000
commit
70cda07526
|
@ -530,6 +530,10 @@ def int_amdgcn_mqsad_u32_u8 :
|
|||
GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_amdgcn_cvt_pk_u8_f32 :
|
||||
GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_amdgcn_icmp :
|
||||
Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
|
||||
[IntrNoMem, IntrConvergent]>;
|
||||
|
|
|
@ -1655,6 +1655,7 @@ def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
|
|||
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
|
||||
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
|
||||
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
|
||||
def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
|
||||
|
||||
// This class is used only with VOPC instructions. Use $sdst for out operand
|
||||
class SIInstAlias <string asm, Instruction inst, VOPProfile p> :
|
||||
|
|
|
@ -1615,6 +1615,11 @@ defm V_SAD_U16 : VOP3Inst <vop3<0x15c, 0x1db>, "v_sad_u16",
|
|||
defm V_SAD_U32 : VOP3Inst <vop3<0x15d, 0x1dc>, "v_sad_u32",
|
||||
VOP_I32_I32_I32_I32
|
||||
>;
|
||||
|
||||
defm V_CVT_PK_U8_F32 : VOP3Inst<vop3<0x15e, 0x1dd>, "v_cvt_pk_u8_f32",
|
||||
VOP_I32_F32_I32_I32, int_amdgcn_cvt_pk_u8_f32
|
||||
>;
|
||||
|
||||
//def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
|
||||
defm V_DIV_FIXUP_F32 : VOP3Inst <
|
||||
vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare i32 @llvm.amdgcn.cvt.pk.u8.f32(float, i32, i32) #0
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_0:
|
||||
; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 0, v{{[0-9]+}}
|
||||
define void @v_cvt_pk_u8_f32_idx_0(i32 addrspace(1)* %out, float %src, i32 %reg) {
|
||||
%result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 0, i32 %reg) #0
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_1:
|
||||
; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
define void @v_cvt_pk_u8_f32_idx_1(i32 addrspace(1)* %out, float %src, i32 %reg) {
|
||||
%result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 1, i32 %reg) #0
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_2:
|
||||
; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 2, v{{[0-9]+}}
|
||||
define void @v_cvt_pk_u8_f32_idx_2(i32 addrspace(1)* %out, float %src, i32 %reg) {
|
||||
%result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 2, i32 %reg) #0
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_3:
|
||||
; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 3, v{{[0-9]+}}
|
||||
define void @v_cvt_pk_u8_f32_idx_3(i32 addrspace(1)* %out, float %src, i32 %reg) {
|
||||
%result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 3, i32 %reg) #0
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_combine:
|
||||
; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 0, v{{[0-9]+}}
|
||||
; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 2, v{{[0-9]+}}
|
||||
; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 3, v{{[0-9]+}}
|
||||
define void @v_cvt_pk_u8_f32_combine(i32 addrspace(1)* %out, float %src, i32 %reg) {
|
||||
%result0 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 0, i32 %reg) #0
|
||||
%result1 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 1, i32 %result0) #0
|
||||
%result2 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 2, i32 %result1) #0
|
||||
%result3 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 3, i32 %result2) #0
|
||||
store i32 %result3, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx:
|
||||
; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
define void @v_cvt_pk_u8_f32_idx(i32 addrspace(1)* %out, float %src, i32 %idx, i32 %reg) {
|
||||
%result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 %idx, i32 %reg) #0
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
Loading…
Reference in New Issue