forked from OSchip/llvm-project
AMDGPU/EG,CM: Add fp16 conversion instructions
Differential Revision: https://reviews.llvm.org/D28164 llvm-svn: 291622
This commit is contained in:
parent
acd6360251
commit
0d6cb1caaf
|
@ -333,11 +333,13 @@ def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24",
|
|||
def DOT4_eg : DOT4_Common<0xBE>;
|
||||
defm CUBE_eg : CUBE_Common<0xC0>;
|
||||
|
||||
def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
|
||||
|
||||
def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>;
|
||||
def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>;
|
||||
|
||||
def FLT32_TO_FLT16 : R600_1OP_Helper <0xA2, "FLT32_TO_FLT16", fp_to_f16, VecALU>;
|
||||
def FLT16_TO_FLT32 : R600_1OP_Helper <0xA3, "FLT16_TO_FLT32", f16_to_fp, VecALU>;
|
||||
def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
|
||||
def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", AMDGPUffbh_u32, VecALU>;
|
||||
def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>;
|
||||
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
|
||||
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
|
||||
|
||||
; SI-LABEL: {{^}}test_convert_fp16_to_fp32:
|
||||
; SI: buffer_load_ushort [[VAL:v[0-9]+]]
|
||||
; SI: v_cvt_f32_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
|
||||
%val = load i16, i16 addrspace(1)* %in, align 2
|
||||
%cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
|
||||
store float %cvt, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; SI-LABEL: {{^}}test_convert_fp16_to_fp64:
|
||||
; SI: buffer_load_ushort [[VAL:v[0-9]+]]
|
||||
; SI: v_cvt_f32_f16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
|
||||
; SI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
|
||||
%val = load i16, i16 addrspace(1)* %in, align 2
|
||||
%cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
|
||||
store double %cvt, double addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=EGCM -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=EGCM -check-prefix=FUNC %s
|
||||
|
||||
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}test_convert_fp16_to_fp32:
|
||||
; GCN: buffer_load_ushort [[VAL:v[0-9]+]]
|
||||
; GCN: v_cvt_f32_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RES:T[0-9]+\.[XYZW]]]
|
||||
; CM: MEM_RAT_CACHELESS STORE_DWORD [[RES:T[0-9]+\.[XYZW]]]
|
||||
; EGCM: VTX_READ_16 [[VAL:T[0-9]+\.[XYZW]]]
|
||||
; EGCM: FLT16_TO_FLT32{{[ *]*}}[[RES]], [[VAL]]
|
||||
define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
|
||||
%val = load i16, i16 addrspace(1)* %in, align 2
|
||||
%cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
|
||||
store float %cvt, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
|
||||
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}test_convert_fp16_to_fp64:
|
||||
; GCN: buffer_load_ushort [[VAL:v[0-9]+]]
|
||||
; GCN: v_cvt_f32_f16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
|
||||
; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
|
||||
; GCN: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
|
||||
%val = load i16, i16 addrspace(1)* %in, align 2
|
||||
%cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
|
||||
store double %cvt, double addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
|
@ -1,12 +1,17 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
|
||||
|
||||
; SI-LABEL: {{^}}test_convert_fp32_to_fp16:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_short [[RESULT]]
|
||||
; FUNC-LABEL: {{^}}test_convert_fp32_to_fp16:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_short [[RESULT]]
|
||||
|
||||
; EG: MEM_RAT MSKOR
|
||||
; EG: VTX_READ_32
|
||||
; EG: FLT32_TO_FLT16
|
||||
define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
|
||||
%val = load float, float addrspace(1)* %in, align 4
|
||||
%cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
|
||||
|
|
Loading…
Reference in New Issue