forked from OSchip/llvm-project
R600/SI: Add patterns for v_cvt_{flr|rpi}_i32_f32
llvm-svn: 226230
This commit is contained in:
parent
c552c9abce
commit
eeb2a7e688
|
@ -438,6 +438,11 @@ def FP_ONE : PatLeaf <
|
||||||
[{return N->isExactlyValue(1.0);}]
|
[{return N->isExactlyValue(1.0);}]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def FP_HALF : PatLeaf <
|
||||||
|
(fpimm),
|
||||||
|
[{return N->isExactlyValue(0.5);}]
|
||||||
|
>;
|
||||||
|
|
||||||
let isCodeGenOnly = 1, isPseudo = 1 in {
|
let isCodeGenOnly = 1, isPseudo = 1 in {
|
||||||
|
|
||||||
let usesCustomInserter = 1 in {
|
let usesCustomInserter = 1 in {
|
||||||
|
@ -603,6 +608,18 @@ class ROTRPattern <Instruction BIT_ALIGN> : Pat <
|
||||||
// 24-bit arithmetic patterns
|
// 24-bit arithmetic patterns
|
||||||
def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>;
|
def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>;
|
||||||
|
|
||||||
|
// Special conversion patterns
|
||||||
|
|
||||||
|
def cvt_rpi_i32_f32 : PatFrag <
|
||||||
|
(ops node:$src),
|
||||||
|
(fp_to_sint (ffloor (fadd $src, FP_HALF)))
|
||||||
|
>;
|
||||||
|
|
||||||
|
def cvt_flr_i32_f32 : PatFrag <
|
||||||
|
(ops node:$src),
|
||||||
|
(fp_to_sint (ffloor $src))
|
||||||
|
>;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
class UMUL24Pattern <Instruction UMUL24> : Pat <
|
class UMUL24Pattern <Instruction UMUL24> : Pat <
|
||||||
(mul U24:$x, U24:$y),
|
(mul U24:$x, U24:$y),
|
||||||
|
|
|
@ -1230,8 +1230,10 @@ defm V_CVT_F16_F32 : VOP1Inst <vop1<0xa>, "v_cvt_f16_f32",
|
||||||
defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16",
|
defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16",
|
||||||
VOP_F32_I32, f16_to_fp
|
VOP_F32_I32, f16_to_fp
|
||||||
>;
|
>;
|
||||||
//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "v_cvt_rpi_i32_f32", []>;
|
defm V_CVT_RPI_I32_F32 : VOP1Inst <vop1<0xc>, "v_cvt_rpi_i32_f32",
|
||||||
//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "v_cvt_flr_i32_f32", []>;
|
VOP_I32_F32, cvt_rpi_i32_f32>;
|
||||||
|
defm V_CVT_FLR_I32_F32 : VOP1Inst <vop1<0xd>, "v_cvt_flr_i32_f32",
|
||||||
|
VOP_I32_F32, cvt_flr_i32_f32>;
|
||||||
//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "v_cvt_off_f32_i4", []>;
|
//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "v_cvt_off_f32_i4", []>;
|
||||||
defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64",
|
defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64",
|
||||||
VOP_F32_F64, fround
|
VOP_F32_F64, fround
|
||||||
|
|
|
@ -0,0 +1,79 @@
|
||||||
|
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||||
|
|
||||||
|
declare float @llvm.fabs.f32(float) #1
|
||||||
|
declare float @llvm.floor.f32(float) #1
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}cvt_flr_i32_f32_0:
|
||||||
|
; SI-NOT: add
|
||||||
|
; SI: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||||
|
; SI: s_endpgm
|
||||||
|
define void @cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
|
||||||
|
%floor = call float @llvm.floor.f32(float %x) #1
|
||||||
|
%cvt = fptosi float %floor to i32
|
||||||
|
store i32 %cvt, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}cvt_flr_i32_f32_1:
|
||||||
|
; SI: v_add_f32_e64 [[TMP:v[0-9]+]], 1.0, s{{[0-9]+}}
|
||||||
|
; SI: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]]
|
||||||
|
; SI: s_endpgm
|
||||||
|
define void @cvt_flr_i32_f32_1(i32 addrspace(1)* %out, float %x) #0 {
|
||||||
|
%fadd = fadd float %x, 1.0
|
||||||
|
%floor = call float @llvm.floor.f32(float %fadd) #1
|
||||||
|
%cvt = fptosi float %floor to i32
|
||||||
|
store i32 %cvt, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs:
|
||||||
|
; SI-NOT: add
|
||||||
|
; SI: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|
|
||||||
|
; SI: s_endpgm
|
||||||
|
define void @cvt_flr_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 {
|
||||||
|
%x.fabs = call float @llvm.fabs.f32(float %x) #1
|
||||||
|
%floor = call float @llvm.floor.f32(float %x.fabs) #1
|
||||||
|
%cvt = fptosi float %floor to i32
|
||||||
|
store i32 %cvt, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fneg:
|
||||||
|
; SI-NOT: add
|
||||||
|
; SI: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}
|
||||||
|
; SI: s_endpgm
|
||||||
|
define void @cvt_flr_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 {
|
||||||
|
%x.fneg = fsub float -0.000000e+00, %x
|
||||||
|
%floor = call float @llvm.floor.f32(float %x.fneg) #1
|
||||||
|
%cvt = fptosi float %floor to i32
|
||||||
|
store i32 %cvt, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs_fneg:
|
||||||
|
; SI-NOT: add
|
||||||
|
; SI: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|
|
||||||
|
; SI: s_endpgm
|
||||||
|
define void @cvt_flr_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 {
|
||||||
|
%x.fabs = call float @llvm.fabs.f32(float %x) #1
|
||||||
|
%x.fabs.fneg = fsub float -0.000000e+00, %x.fabs
|
||||||
|
%floor = call float @llvm.floor.f32(float %x.fabs.fneg) #1
|
||||||
|
%cvt = fptosi float %floor to i32
|
||||||
|
store i32 %cvt, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}no_cvt_flr_i32_f32_0:
|
||||||
|
; SI-NOT: v_cvt_flr_i32_f32
|
||||||
|
; SI: v_floor_f32
|
||||||
|
; SI: v_cvt_u32_f32_e32
|
||||||
|
; SI: s_endpgm
|
||||||
|
define void @no_cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
|
||||||
|
%floor = call float @llvm.floor.f32(float %x) #1
|
||||||
|
%cvt = fptoui float %floor to i32
|
||||||
|
store i32 %cvt, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
||||||
|
attributes #1 = { nounwind readnone }
|
|
@ -0,0 +1,76 @@
|
||||||
|
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||||
|
|
||||||
|
declare float @llvm.fabs.f32(float) #1
|
||||||
|
declare float @llvm.floor.f32(float) #1
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}cvt_rpi_i32_f32:
|
||||||
|
; SI: v_cvt_rpi_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||||
|
; SI: s_endpgm
|
||||||
|
define void @cvt_rpi_i32_f32(i32 addrspace(1)* %out, float %x) #0 {
|
||||||
|
%fadd = fadd float %x, 0.5
|
||||||
|
%floor = call float @llvm.floor.f32(float %fadd) #1
|
||||||
|
%cvt = fptosi float %floor to i32
|
||||||
|
store i32 %cvt, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs:
|
||||||
|
; SI: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}}
|
||||||
|
; SI: s_endpgm
|
||||||
|
define void @cvt_rpi_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 {
|
||||||
|
%x.fabs = call float @llvm.fabs.f32(float %x) #1
|
||||||
|
%fadd = fadd float %x.fabs, 0.5
|
||||||
|
%floor = call float @llvm.floor.f32(float %fadd) #1
|
||||||
|
%cvt = fptosi float %floor to i32
|
||||||
|
store i32 %cvt, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FIXME: This doesn't work because it forms fsub 0.5, x
|
||||||
|
; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fneg:
|
||||||
|
; XSI: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}
|
||||||
|
; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, s{{[0-9]+}}
|
||||||
|
; SI: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]]
|
||||||
|
; SI: s_endpgm
|
||||||
|
define void @cvt_rpi_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 {
|
||||||
|
%x.fneg = fsub float -0.000000e+00, %x
|
||||||
|
%fadd = fadd float %x.fneg, 0.5
|
||||||
|
%floor = call float @llvm.floor.f32(float %fadd) #1
|
||||||
|
%cvt = fptosi float %floor to i32
|
||||||
|
store i32 %cvt, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FIXME: This doesn't work for same reason as above
|
||||||
|
; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs_fneg:
|
||||||
|
; XSI: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|
|
||||||
|
|
||||||
|
; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, |s{{[0-9]+}}|
|
||||||
|
; SI: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]]
|
||||||
|
; SI: s_endpgm
|
||||||
|
define void @cvt_rpi_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 {
|
||||||
|
%x.fabs = call float @llvm.fabs.f32(float %x) #1
|
||||||
|
%x.fabs.fneg = fsub float -0.000000e+00, %x.fabs
|
||||||
|
%fadd = fadd float %x.fabs.fneg, 0.5
|
||||||
|
%floor = call float @llvm.floor.f32(float %fadd) #1
|
||||||
|
%cvt = fptosi float %floor to i32
|
||||||
|
store i32 %cvt, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}no_cvt_rpi_i32_f32_0:
|
||||||
|
; SI-NOT: v_cvt_rpi_i32_f32
|
||||||
|
; SI: v_add_f32
|
||||||
|
; SI: v_floor_f32
|
||||||
|
; SI: v_cvt_u32_f32
|
||||||
|
; SI: s_endpgm
|
||||||
|
define void @no_cvt_rpi_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
|
||||||
|
%fadd = fadd float %x, 0.5
|
||||||
|
%floor = call float @llvm.floor.f32(float %fadd) #1
|
||||||
|
%cvt = fptoui float %floor to i32
|
||||||
|
store i32 %cvt, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
||||||
|
attributes #1 = { nounwind readnone }
|
Loading…
Reference in New Issue