GlobalISel: Use the original flags when lowering fneg to fsub

This was ignoring the flag on fneg, and using the source instruction's
flags. Also fixes tests missing from r358702.

Note the expansion itself isn't correct without nnan, but that should
be fixed separately.

llvm-svn: 363637
This commit is contained in:
Matt Arsenault 2019-06-17 23:48:43 +00:00
parent d57f7cc15e
commit 5a321b899e
4 changed files with 108 additions and 2 deletions

View File

@ -1463,9 +1463,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
unsigned SubByReg = MI.getOperand(1).getReg(); unsigned SubByReg = MI.getOperand(1).getReg();
unsigned ZeroReg = Zero->getOperand(0).getReg(); unsigned ZeroReg = Zero->getOperand(0).getReg();
MachineInstr *SrcMI = MRI.getVRegDef(SubByReg);
MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg}, MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
SrcMI->getFlags()); MI.getFlags());
MI.eraseFromParent(); MI.eraseFromParent();
return Legalized; return Legalized;
} }

View File

@ -0,0 +1,31 @@
; RUN: llc -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
; Check flags are preserved for a regular instruction.
; CHECK-LABEL: name: fadd_nnan
; CHECK: nnan G_FADD
define amdgpu_kernel void @fadd_nnan(float %arg0, float %arg1) {
%res = fadd nnan float %arg0, %arg1
store float %res, float addrspace(1)* undef
ret void
}
; Check flags are preserved for a specially handled intrinsic
; CHECK-LABEL: name: fma_fast
; CHECK: nnan ninf nsz arcp contract afn reassoc G_FMA
define amdgpu_kernel void @fma_fast(float %arg0, float %arg1, float %arg2) {
%res = call fast float @llvm.fma.f32(float %arg0, float %arg1, float %arg2)
store float %res, float addrspace(1)* undef
ret void
}
; Check flags are preserved for an arbitrarry target intrinsic
; CHECK-LABEL: name: rcp_nsz
; CHECK: = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %8(s32)
define amdgpu_kernel void @rcp_nsz(float %arg0) {
%res = call nsz float @llvm.amdgcn.rcp.f32 (float %arg0)
store float %res, float addrspace(1)* undef
ret void
}
declare float @llvm.fma.f32(float, float, float)
declare float @llvm.amdgcn.rcp.f32(float)

View File

@ -59,6 +59,36 @@ body: |
$vgpr0_vgpr1 = COPY %2 $vgpr0_vgpr1 = COPY %2
... ...
---
name: test_fsub_s64_fmf
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; SI-LABEL: name: test_fsub_s64_fmf
; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]]
; SI: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]]
; SI: $vgpr0_vgpr1 = COPY %2(s64)
; VI-LABEL: name: test_fsub_s64_fmf
; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]]
; VI: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]]
; VI: $vgpr0_vgpr1 = COPY %2(s64)
; GFX9-LABEL: name: test_fsub_s64_fmf
; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]]
; GFX9: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]]
; GFX9: $vgpr0_vgpr1 = COPY %2(s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(s64) = nnan nsz G_FSUB %0, %1
$vgpr0_vgpr1 = COPY %2
...
--- ---
name: test_fsub_s16 name: test_fsub_s16
body: | body: |

View File

@ -729,4 +729,50 @@ TEST_F(GISelMITest, FewerElementsPhi) {
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
} }
// FNEG expansion in terms of FSUB
TEST_F(GISelMITest, LowerFNEG) {
if (!TM)
return;
// Declare your legalization info
DefineLegalizerInfo(A, {
getActionDefinitionsBuilder(G_FSUB).legalFor({s64});
});
// Build Instr. Make sure FMF are preserved.
auto FAdd =
B.buildInstr(TargetOpcode::G_FADD, {LLT::scalar(64)}, {Copies[0], Copies[1]},
MachineInstr::MIFlag::FmNsz);
// Should not propagate the flags of src instruction.
auto FNeg0 =
B.buildInstr(TargetOpcode::G_FNEG, {LLT::scalar(64)}, {FAdd.getReg(0)},
{MachineInstr::MIFlag::FmArcp});
// Preserve the one flag.
auto FNeg1 =
B.buildInstr(TargetOpcode::G_FNEG, {LLT::scalar(64)}, {Copies[0]},
MachineInstr::MIFlag::FmNoInfs);
AInfo Info(MF->getSubtarget());
DummyGISelObserver Observer;
LegalizerHelper Helper(*MF, Info, Observer, B);
// Perform Legalization
EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
Helper.lower(*FNeg0, 0, LLT::scalar(64)));
EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
Helper.lower(*FNeg1, 0, LLT::scalar(64)));
auto CheckStr = R"(
CHECK: [[FADD:%[0-9]+]]:_(s64) = nsz G_FADD %0:_, %1:_
CHECK: [[CONST0:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00
CHECK: [[FSUB0:%[0-9]+]]:_(s64) = arcp G_FSUB [[CONST0]]:_, [[FADD]]:_
CHECK: [[CONST1:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00
CHECK: [[FSUB1:%[0-9]+]]:_(s64) = ninf G_FSUB [[CONST1]]:_, %0:_
)";
// Check
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
}
} // namespace } // namespace