From 53f0cc238c5b8b049d9c41725907bfae105d9cc4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 26 Jan 2017 01:25:36 +0000 Subject: [PATCH] AMDGPU: Fold fneg into round instructions llvm-svn: 293127 --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 8 +- llvm/test/CodeGen/AMDGPU/fneg-combines.ll | 90 ++++++++++++++++++- llvm/test/CodeGen/AMDGPU/frem.ll | 16 ++-- .../CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll | 4 +- 4 files changed, 106 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index d92178328ff0..c9c44bf9d809 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -492,6 +492,9 @@ static bool fnegFoldsIntoOp(unsigned Opc) { case ISD::FMA: case ISD::FMAD: case ISD::FSIN: + case ISD::FTRUNC: + case ISD::FRINT: + case ISD::FNEARBYINT: case AMDGPUISD::RCP: case AMDGPUISD::RCP_LEGACY: case AMDGPUISD::SIN_HW: @@ -2924,9 +2927,12 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, return Res; } case ISD::FP_EXTEND: + case ISD::FTRUNC: + case ISD::FRINT: + case ISD::FNEARBYINT: // XXX - Should fround be handled? + case ISD::FSIN: case AMDGPUISD::RCP: case AMDGPUISD::RCP_LEGACY: - case ISD::FSIN: case AMDGPUISD::SIN_HW: { SDValue CvtSrc = N0.getOperand(0); if (CvtSrc.getOpcode() == ISD::FNEG) { diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index 040f1eceed58..37115e795e8d 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -1327,7 +1327,91 @@ define void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %sin = call float @llvm.amdgcn.sin.f32(float %a) - %fneg = fsub float -0.000000e+00, %sin + %fneg = fsub float -0.0, %sin + store float %fneg, float addrspace(1)* %out.gep + ret void +} + +; -------------------------------------------------------------------------------- +; ftrunc tests +; -------------------------------------------------------------------------------- + +; GCN-LABEL: {{^}}v_fneg_trunc_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]] +; GCN: buffer_store_dword [[RESULT]] +define void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %trunc = call float @llvm.trunc.f32(float %a) + %fneg = fsub float -0.0, %trunc + store float %fneg, float addrspace(1)* %out.gep + ret void +} + +; -------------------------------------------------------------------------------- +; fround tests +; -------------------------------------------------------------------------------- + +; GCN-LABEL: {{^}}v_fneg_round_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: v_trunc_f32_e32 +; GCN: v_subrev_f32_e32 +; GCN: v_cndmask_b32 +; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}} +; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, v{{[0-9]+}} +; GCN: buffer_store_dword [[RESULT]] +define void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %round = call float @llvm.round.f32(float %a) + %fneg = fsub float -0.0, %round + store float %fneg, float addrspace(1)* %out.gep + ret void +} + +; -------------------------------------------------------------------------------- +; rint tests +; -------------------------------------------------------------------------------- + +; GCN-LABEL: {{^}}v_fneg_rint_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]] +; GCN: buffer_store_dword [[RESULT]] +define void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %rint = call float @llvm.rint.f32(float %a) + %fneg = fsub float -0.0, %rint + store float %fneg, float addrspace(1)* %out.gep + ret void +} + +; -------------------------------------------------------------------------------- +; nearbyint tests +; -------------------------------------------------------------------------------- + +; GCN-LABEL: {{^}}v_fneg_nearbyint_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]] +; GCN: buffer_store_dword [[RESULT]] +define void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %nearbyint = call float @llvm.nearbyint.f32(float %a) + %fneg = fsub float -0.0, %nearbyint store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1336,6 +1420,10 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 declare float @llvm.fma.f32(float, float, float) #1 declare float @llvm.fmuladd.f32(float, float, float) #1 declare float @llvm.sin.f32(float) #1 +declare float @llvm.trunc.f32(float) #1 +declare float @llvm.round.f32(float) #1 +declare float @llvm.rint.f32(float) #1 +declare float @llvm.nearbyint.f32(float) #1 declare float @llvm.amdgcn.sin.f32(float) #1 declare float @llvm.amdgcn.rcp.f32(float) #1 diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index 039623c02194..97533c418c94 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}frem_f32: ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}} @@ -12,8 +12,8 @@ ; GCN: v_mul_f32_e32 ; GCN: v_div_fmas_f32 ; GCN: v_div_fixup_f32 -; GCN: v_trunc_f32_e32 -; GCN: v_mad_f32 +; GCN: v_trunc_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}} +; GCN: v_mac_f32_e32 ; GCN: s_endpgm define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #0 { @@ -28,11 +28,11 @@ define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, ; FUNC-LABEL: {{^}}unsafe_frem_f32: ; GCN: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16 ; GCN: buffer_load_dword [[X:v[0-9]+]], {{.*}} -; GCN: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]] +; GCN: v_rcp_f32_e64 [[INVY:v[0-9]+]], -[[Y]] ; GCN: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]] ; GCN: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]] -; GCN: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]] -; GCN: buffer_store_dword [[RESULT]] +; GCN: v_mac_f32_e32 [[X]], [[Y]], [[TRUNC]] +; GCN: buffer_store_dword [[X]] ; GCN: s_endpgm define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #1 { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll index 011a0fdbd219..3798c46677f0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll @@ -46,8 +46,8 @@ entry: ; GCN-LABEL: {{^}}class_f16_fneg ; GCN: s_load_dword s[[SA_F16:[0-9]+]] ; GCN: s_load_dword s[[SB_I32:[0-9]+]] -; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] -; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -v[[VA_F16]], s[[SB_I32]] +; VI: v_trunc_f16_e64 v[[VA_F16:[0-9]+]], -s[[SA_F16]] +; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[VA_F16]], s[[SB_I32]] ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] ; GCN: buffer_store_dword v[[VR_I32]] ; GCN: s_endpgm