forked from OSchip/llvm-project
[AMDGPU] Add simplification/combines for llvm.amdgcn.fma.legacy
This follows on from D89558 which added the new intrinsic and D88955 which added similar combines for llvm.amdgcn.fmul.legacy. Differential Revision: https://reviews.llvm.org/D90028
This commit is contained in:
parent
4f7ee55971
commit
958130dfda
llvm
lib/Target/AMDGPU
test/Transforms/InstCombine/AMDGPU
|
@ -163,6 +163,27 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
|
||||||
return IC.replaceInstUsesWith(II, NewCall);
|
return IC.replaceInstUsesWith(II, NewCall);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
|
||||||
|
InstCombiner &IC) const {
|
||||||
|
// The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
|
||||||
|
// infinity, gives +0.0. If we can prove we don't have one of the special
|
||||||
|
// cases then we can use a normal multiply instead.
|
||||||
|
// TODO: Create and use isKnownFiniteNonZero instead of just matching
|
||||||
|
// constants here.
|
||||||
|
if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
|
||||||
|
match(Op1, PatternMatch::m_FiniteNonZero())) {
|
||||||
|
// One operand is not zero or infinity or NaN.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
auto *TLI = &IC.getTargetLibraryInfo();
|
||||||
|
if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
|
||||||
|
isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
|
||||||
|
// Neither operand is infinity or NaN.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
Optional<Instruction *>
|
Optional<Instruction *>
|
||||||
GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
||||||
Intrinsic::ID IID = II.getIntrinsicID();
|
Intrinsic::ID IID = II.getIntrinsicID();
|
||||||
|
@ -836,26 +857,40 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
||||||
|
|
||||||
// If we can prove we don't have one of the special cases then we can use a
|
// If we can prove we don't have one of the special cases then we can use a
|
||||||
// normal fmul instruction instead.
|
// normal fmul instruction instead.
|
||||||
auto *TLI = &IC.getTargetLibraryInfo();
|
if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
|
||||||
bool CanSimplifyToMul = false;
|
|
||||||
// TODO: Create and use isKnownFiniteNonZero instead of just matching
|
|
||||||
// constants here.
|
|
||||||
if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
|
|
||||||
match(Op1, PatternMatch::m_FiniteNonZero())) {
|
|
||||||
// One operand is not zero or infinity or NaN.
|
|
||||||
CanSimplifyToMul = true;
|
|
||||||
} else if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
|
|
||||||
isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
|
|
||||||
// Neither operand is infinity or NaN.
|
|
||||||
CanSimplifyToMul = true;
|
|
||||||
}
|
|
||||||
if (CanSimplifyToMul) {
|
|
||||||
auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
|
auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
|
||||||
FMul->takeName(&II);
|
FMul->takeName(&II);
|
||||||
return IC.replaceInstUsesWith(II, FMul);
|
return IC.replaceInstUsesWith(II, FMul);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case Intrinsic::amdgcn_fma_legacy: {
|
||||||
|
Value *Op0 = II.getArgOperand(0);
|
||||||
|
Value *Op1 = II.getArgOperand(1);
|
||||||
|
Value *Op2 = II.getArgOperand(2);
|
||||||
|
|
||||||
|
// The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
|
||||||
|
// infinity, gives +0.0.
|
||||||
|
// TODO: Move to InstSimplify?
|
||||||
|
if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
|
||||||
|
match(Op1, PatternMatch::m_AnyZeroFP())) {
|
||||||
|
// It's tempting to just return Op2 here, but that would give the wrong
|
||||||
|
// result if Op2 was -0.0.
|
||||||
|
auto *Zero = ConstantFP::getNullValue(II.getType());
|
||||||
|
auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
|
||||||
|
FAdd->takeName(&II);
|
||||||
|
return IC.replaceInstUsesWith(II, FAdd);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we can prove we don't have one of the special cases then we can use a
|
||||||
|
// normal fma instead.
|
||||||
|
if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
|
||||||
|
II.setCalledOperand(Intrinsic::getDeclaration(
|
||||||
|
II.getModule(), Intrinsic::fma, II.getType()));
|
||||||
|
return &II;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
default: {
|
default: {
|
||||||
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
|
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
|
||||||
AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
|
AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
|
||||||
|
|
|
@ -227,6 +227,8 @@ public:
|
||||||
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
|
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
|
||||||
Value *NewV) const;
|
Value *NewV) const;
|
||||||
|
|
||||||
|
bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
|
||||||
|
InstCombiner &IC) const;
|
||||||
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||||
IntrinsicInst &II) const;
|
IntrinsicInst &II) const;
|
||||||
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||||
|
|
|
@ -0,0 +1,86 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||||
|
; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -instcombine -S | FileCheck %s
|
||||||
|
|
||||||
|
; Simplify to +0.0 + z.
|
||||||
|
define float @test_zero(float %x, float %z) {
|
||||||
|
; CHECK-LABEL: @test_zero(
|
||||||
|
; CHECK-NEXT: [[CALL:%.*]] = fadd float [[Z:%.*]], 0.000000e+00
|
||||||
|
; CHECK-NEXT: ret float [[CALL]]
|
||||||
|
;
|
||||||
|
%call = call float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
|
||||||
|
ret float %call
|
||||||
|
}
|
||||||
|
|
||||||
|
; Simplify to +0.0 + z, preserving fmf.
|
||||||
|
define float @test_zero_fmf(float %x, float %z) {
|
||||||
|
; CHECK-LABEL: @test_zero_fmf(
|
||||||
|
; CHECK-NEXT: [[CALL:%.*]] = fadd contract float [[Z:%.*]], 0.000000e+00
|
||||||
|
; CHECK-NEXT: ret float [[CALL]]
|
||||||
|
;
|
||||||
|
%call = call contract float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
|
||||||
|
ret float %call
|
||||||
|
}
|
||||||
|
|
||||||
|
; Simplify to z.
|
||||||
|
define float @test_zero_nsz(float %x, float %z) {
|
||||||
|
; CHECK-LABEL: @test_zero_nsz(
|
||||||
|
; CHECK-NEXT: ret float [[Z:%.*]]
|
||||||
|
;
|
||||||
|
%call = call nsz float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
|
||||||
|
ret float %call
|
||||||
|
}
|
||||||
|
|
||||||
|
; Simplify to +0.0 + z.
|
||||||
|
define float @test_negzero(float %y, float %z) {
|
||||||
|
; CHECK-LABEL: @test_negzero(
|
||||||
|
; CHECK-NEXT: [[CALL:%.*]] = fadd float [[Z:%.*]], 0.000000e+00
|
||||||
|
; CHECK-NEXT: ret float [[CALL]]
|
||||||
|
;
|
||||||
|
%call = call float @llvm.amdgcn.fma.legacy(float -0.0, float %y, float %z)
|
||||||
|
ret float %call
|
||||||
|
}
|
||||||
|
|
||||||
|
; Simplify to z.
|
||||||
|
define float @test_negzero_nsz(float %y, float %z) {
|
||||||
|
; CHECK-LABEL: @test_negzero_nsz(
|
||||||
|
; CHECK-NEXT: ret float [[Z:%.*]]
|
||||||
|
;
|
||||||
|
%call = call nsz float @llvm.amdgcn.fma.legacy(float -0.0, float %y, float %z)
|
||||||
|
ret float %call
|
||||||
|
}
|
||||||
|
|
||||||
|
; Combine to fma because the constant is finite and non-zero.
|
||||||
|
define float @test_const(float %x, float %z) {
|
||||||
|
; CHECK-LABEL: @test_const(
|
||||||
|
; CHECK-NEXT: [[CALL:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float 9.950000e+01, float [[Z:%.*]])
|
||||||
|
; CHECK-NEXT: ret float [[CALL]]
|
||||||
|
;
|
||||||
|
%call = call float @llvm.amdgcn.fma.legacy(float %x, float 99.5, float %z)
|
||||||
|
ret float %call
|
||||||
|
}
|
||||||
|
|
||||||
|
; Combine to fma because the constant is finite and non-zero, preserving fmf.
|
||||||
|
define float @test_const_fmf(float %x, float %z) {
|
||||||
|
; CHECK-LABEL: @test_const_fmf(
|
||||||
|
; CHECK-NEXT: [[CALL:%.*]] = call contract float @llvm.fma.f32(float [[X:%.*]], float 9.950000e+01, float [[Z:%.*]])
|
||||||
|
; CHECK-NEXT: ret float [[CALL]]
|
||||||
|
;
|
||||||
|
%call = call contract float @llvm.amdgcn.fma.legacy(float %x, float 99.5, float %z)
|
||||||
|
ret float %call
|
||||||
|
}
|
||||||
|
|
||||||
|
; Combine to fma because neither argument can be infinity or NaN.
|
||||||
|
define float @test_finite(i32 %x, i32 %y, float %z) {
|
||||||
|
; CHECK-LABEL: @test_finite(
|
||||||
|
; CHECK-NEXT: [[XF:%.*]] = sitofp i32 [[X:%.*]] to float
|
||||||
|
; CHECK-NEXT: [[YF:%.*]] = sitofp i32 [[Y:%.*]] to float
|
||||||
|
; CHECK-NEXT: [[CALL:%.*]] = call float @llvm.fma.f32(float [[XF]], float [[YF]], float [[Z:%.*]])
|
||||||
|
; CHECK-NEXT: ret float [[CALL]]
|
||||||
|
;
|
||||||
|
%xf = sitofp i32 %x to float
|
||||||
|
%yf = sitofp i32 %y to float
|
||||||
|
%call = call float @llvm.amdgcn.fma.legacy(float %xf, float %yf, float %z)
|
||||||
|
ret float %call
|
||||||
|
}
|
||||||
|
|
||||||
|
declare float @llvm.amdgcn.fma.legacy(float, float, float)
|
|
@ -29,6 +29,16 @@ define float @test_const(float %x) {
|
||||||
ret float %call
|
ret float %call
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Combine to fmul because the constant is finite and non-zero, preserving fmf.
|
||||||
|
define float @test_const_fmf(float %x) {
|
||||||
|
; CHECK-LABEL: @test_const_fmf(
|
||||||
|
; CHECK-NEXT: [[CALL:%.*]] = fmul contract float [[X:%.*]], 9.950000e+01
|
||||||
|
; CHECK-NEXT: ret float [[CALL]]
|
||||||
|
;
|
||||||
|
%call = call contract float @llvm.amdgcn.fmul.legacy(float %x, float 99.5)
|
||||||
|
ret float %call
|
||||||
|
}
|
||||||
|
|
||||||
; Combine to fmul because neither argument can be infinity or NaN.
|
; Combine to fmul because neither argument can be infinity or NaN.
|
||||||
define float @test_finite(i32 %x, i32 %y) {
|
define float @test_finite(i32 %x, i32 %y) {
|
||||||
; CHECK-LABEL: @test_finite(
|
; CHECK-LABEL: @test_finite(
|
||||||
|
|
Loading…
Reference in New Issue