[AMDGPU] Add simplification/combines for llvm.amdgcn.fma.legacy

This follows on from D89558 which added the new intrinsic and D88955
which added similar combines for llvm.amdgcn.fmul.legacy.

Differential Revision: https://reviews.llvm.org/D90028
This commit is contained in:
Jay Foad 2020-10-23 12:52:14 +01:00
parent 4f7ee55971
commit 958130dfda
4 changed files with 147 additions and 14 deletions

View File

@ -163,6 +163,27 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
return IC.replaceInstUsesWith(II, NewCall);
}
bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
InstCombiner &IC) const {
// The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
// infinity, gives +0.0. If we can prove we don't have one of the special
// cases then we can use a normal multiply instead.
// TODO: Create and use isKnownFiniteNonZero instead of just matching
// constants here.
if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
match(Op1, PatternMatch::m_FiniteNonZero())) {
// One operand is not zero or infinity or NaN.
return true;
}
auto *TLI = &IC.getTargetLibraryInfo();
if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
// Neither operand is infinity or NaN.
return true;
}
return false;
}
Optional<Instruction *>
GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
Intrinsic::ID IID = II.getIntrinsicID();
@ -836,26 +857,40 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
// If we can prove we don't have one of the special cases then we can use a
// normal fmul instruction instead.
auto *TLI = &IC.getTargetLibraryInfo();
bool CanSimplifyToMul = false;
// TODO: Create and use isKnownFiniteNonZero instead of just matching
// constants here.
if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
match(Op1, PatternMatch::m_FiniteNonZero())) {
// One operand is not zero or infinity or NaN.
CanSimplifyToMul = true;
} else if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
// Neither operand is infinity or NaN.
CanSimplifyToMul = true;
}
if (CanSimplifyToMul) {
if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
FMul->takeName(&II);
return IC.replaceInstUsesWith(II, FMul);
}
break;
}
case Intrinsic::amdgcn_fma_legacy: {
Value *Op0 = II.getArgOperand(0);
Value *Op1 = II.getArgOperand(1);
Value *Op2 = II.getArgOperand(2);
// The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
// infinity, gives +0.0.
// TODO: Move to InstSimplify?
if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
match(Op1, PatternMatch::m_AnyZeroFP())) {
// It's tempting to just return Op2 here, but that would give the wrong
// result if Op2 was -0.0.
auto *Zero = ConstantFP::getNullValue(II.getType());
auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
FAdd->takeName(&II);
return IC.replaceInstUsesWith(II, FAdd);
}
// If we can prove we don't have one of the special cases then we can use a
// normal fma instead.
if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
II.setCalledOperand(Intrinsic::getDeclaration(
II.getModule(), Intrinsic::fma, II.getType()));
return &II;
}
break;
}
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {

View File

@ -227,6 +227,8 @@ public:
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const;
bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
InstCombiner &IC) const;
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(

View File

@ -0,0 +1,86 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -instcombine -S | FileCheck %s
; Simplify to +0.0 + z.
define float @test_zero(float %x, float %z) {
; CHECK-LABEL: @test_zero(
; CHECK-NEXT: [[CALL:%.*]] = fadd float [[Z:%.*]], 0.000000e+00
; CHECK-NEXT: ret float [[CALL]]
;
%call = call float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
ret float %call
}
; Simplify to +0.0 + z, preserving fmf.
define float @test_zero_fmf(float %x, float %z) {
; CHECK-LABEL: @test_zero_fmf(
; CHECK-NEXT: [[CALL:%.*]] = fadd contract float [[Z:%.*]], 0.000000e+00
; CHECK-NEXT: ret float [[CALL]]
;
%call = call contract float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
ret float %call
}
; Simplify to z.
define float @test_zero_nsz(float %x, float %z) {
; CHECK-LABEL: @test_zero_nsz(
; CHECK-NEXT: ret float [[Z:%.*]]
;
%call = call nsz float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
ret float %call
}
; Simplify to +0.0 + z.
define float @test_negzero(float %y, float %z) {
; CHECK-LABEL: @test_negzero(
; CHECK-NEXT: [[CALL:%.*]] = fadd float [[Z:%.*]], 0.000000e+00
; CHECK-NEXT: ret float [[CALL]]
;
%call = call float @llvm.amdgcn.fma.legacy(float -0.0, float %y, float %z)
ret float %call
}
; Simplify to z.
define float @test_negzero_nsz(float %y, float %z) {
; CHECK-LABEL: @test_negzero_nsz(
; CHECK-NEXT: ret float [[Z:%.*]]
;
%call = call nsz float @llvm.amdgcn.fma.legacy(float -0.0, float %y, float %z)
ret float %call
}
; Combine to fma because the constant is finite and non-zero.
define float @test_const(float %x, float %z) {
; CHECK-LABEL: @test_const(
; CHECK-NEXT: [[CALL:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float 9.950000e+01, float [[Z:%.*]])
; CHECK-NEXT: ret float [[CALL]]
;
%call = call float @llvm.amdgcn.fma.legacy(float %x, float 99.5, float %z)
ret float %call
}
; Combine to fma because the constant is finite and non-zero, preserving fmf.
define float @test_const_fmf(float %x, float %z) {
; CHECK-LABEL: @test_const_fmf(
; CHECK-NEXT: [[CALL:%.*]] = call contract float @llvm.fma.f32(float [[X:%.*]], float 9.950000e+01, float [[Z:%.*]])
; CHECK-NEXT: ret float [[CALL]]
;
%call = call contract float @llvm.amdgcn.fma.legacy(float %x, float 99.5, float %z)
ret float %call
}
; Combine to fma because neither argument can be infinity or NaN.
define float @test_finite(i32 %x, i32 %y, float %z) {
; CHECK-LABEL: @test_finite(
; CHECK-NEXT: [[XF:%.*]] = sitofp i32 [[X:%.*]] to float
; CHECK-NEXT: [[YF:%.*]] = sitofp i32 [[Y:%.*]] to float
; CHECK-NEXT: [[CALL:%.*]] = call float @llvm.fma.f32(float [[XF]], float [[YF]], float [[Z:%.*]])
; CHECK-NEXT: ret float [[CALL]]
;
%xf = sitofp i32 %x to float
%yf = sitofp i32 %y to float
%call = call float @llvm.amdgcn.fma.legacy(float %xf, float %yf, float %z)
ret float %call
}
declare float @llvm.amdgcn.fma.legacy(float, float, float)

View File

@ -29,6 +29,16 @@ define float @test_const(float %x) {
ret float %call
}
; Combine to fmul because the constant is finite and non-zero, preserving fmf.
define float @test_const_fmf(float %x) {
; CHECK-LABEL: @test_const_fmf(
; CHECK-NEXT: [[CALL:%.*]] = fmul contract float [[X:%.*]], 9.950000e+01
; CHECK-NEXT: ret float [[CALL]]
;
%call = call contract float @llvm.amdgcn.fmul.legacy(float %x, float 99.5)
ret float %call
}
; Combine to fmul because neither argument can be infinity or NaN.
define float @test_finite(i32 %x, i32 %y) {
; CHECK-LABEL: @test_finite(