forked from OSchip/llvm-project
R600/SI: Add initial double precision support for SI
Patch by: Niels Ole Salscheider Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 186177
This commit is contained in:
parent
2d17f67651
commit
7512c0803c
|
@ -60,12 +60,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
|||
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::f64, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::f32, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::f64, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
|
||||
|
||||
setOperationAction(ISD::MUL, MVT::i64, Expand);
|
||||
|
||||
setOperationAction(ISD::UDIV, MVT::i32, Expand);
|
||||
|
|
|
@ -45,6 +45,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
|||
|
||||
addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass);
|
||||
addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
|
||||
addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
|
||||
|
||||
addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
|
||||
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
|
||||
|
|
|
@ -663,7 +663,9 @@ defm V_RSQ_LEGACY_F32 : VOP1_32 <
|
|||
[(set f32:$dst, (int_AMDGPU_rsq f32:$src0))]
|
||||
>;
|
||||
defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
|
||||
defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
|
||||
defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64",
|
||||
[(set f64:$dst, (fdiv FP_ONE, f64:$src0))]
|
||||
>;
|
||||
defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
|
||||
defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
|
||||
defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
|
||||
|
@ -1008,10 +1010,25 @@ def V_LSHR_B64 : VOP3_64_Shift <0x00000162, "V_LSHR_B64",
|
|||
>;
|
||||
def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64", []>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
|
||||
def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
|
||||
def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
|
||||
def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
|
||||
def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
|
||||
|
||||
} // isCommutable = 1
|
||||
|
||||
def : Pat <
|
||||
(fadd f64:$src0, f64:$src1),
|
||||
(V_ADD_F64 $src0, $src1, (i64 0))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(fmul f64:$src0, f64:$src1),
|
||||
(V_MUL_F64 $src0, $src1, (i64 0))
|
||||
>;
|
||||
|
||||
def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
|
@ -1434,6 +1451,10 @@ def : BitConvert <i32, f32, VReg_32>;
|
|||
def : BitConvert <f32, i32, SReg_32>;
|
||||
def : BitConvert <f32, i32, VReg_32>;
|
||||
|
||||
def : BitConvert <i64, f64, VReg_64>;
|
||||
|
||||
def : BitConvert <f64, i64, VReg_64>;
|
||||
|
||||
/********** =================== **********/
|
||||
/********** Src & Dst modifiers **********/
|
||||
/********** =================== **********/
|
||||
|
@ -1522,6 +1543,11 @@ def : Pat<
|
|||
(V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1))
|
||||
>;
|
||||
|
||||
def : Pat<
|
||||
(fdiv f64:$src0, f64:$src1),
|
||||
(V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(fcos f32:$src0),
|
||||
(V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
|
||||
|
@ -1672,6 +1698,8 @@ multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
|
|||
>;
|
||||
}
|
||||
|
||||
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64,
|
||||
global_load, constant_load>;
|
||||
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32,
|
||||
global_load, constant_load>;
|
||||
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32,
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
|
||||
|
||||
; CHECK: @fadd_f64
|
||||
; CHECK: V_ADD_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
|
||||
|
||||
define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) {
|
||||
%r0 = load double addrspace(1)* %in1
|
||||
%r1 = load double addrspace(1)* %in2
|
||||
%r2 = fadd double %r0, %r1
|
||||
store double %r2, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
|
||||
|
||||
; CHECK: @fdiv_f64
|
||||
; CHECK: V_RCP_F64_e32 {{VGPR[0-9]+_VGPR[0-9]+}}
|
||||
; CHECK: V_MUL_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
|
||||
|
||||
define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) {
|
||||
%r0 = load double addrspace(1)* %in1
|
||||
%r1 = load double addrspace(1)* %in2
|
||||
%r2 = fdiv double %r0, %r1
|
||||
store double %r2, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
|
||||
|
||||
; CHECK: @fmul_f64
|
||||
; CHECK: V_MUL_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
|
||||
|
||||
define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) {
|
||||
%r0 = load double addrspace(1)* %in1
|
||||
%r1 = load double addrspace(1)* %in2
|
||||
%r2 = fmul double %r0, %r1
|
||||
store double %r2, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
|
||||
|
||||
; load a f64 value from the global address space.
|
||||
; CHECK: @load_f64
|
||||
; CHECK: BUFFER_LOAD_DWORDX2 VGPR{{[0-9]+}}
|
||||
define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
entry:
|
||||
%0 = load double addrspace(1)* %in
|
||||
store double %0, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; Load a f64 value from the constant address space.
|
||||
; CHECK: @load_const_addrspace_f64
|
||||
; CHECK: S_LOAD_DWORDX2 SGPR{{[0-9]+}}
|
||||
define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
|
||||
%1 = load double addrspace(2)* %in
|
||||
store double %1, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue