forked from OSchip/llvm-project
AMDGPU: Check NSZ MI flag when folding omod
I'm not sure the exact nsz flag combination that is OK. I think as long as it's on either, this is OK. For now just check it on the omod multiply. llvm-svn: 339513
This commit is contained in:
parent
b5acec1f79
commit
13b0db9285
|
@ -994,9 +994,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
|||
// omod is ignored by hardware if IEEE bit is enabled. omod also does not
|
||||
// correctly handle signed zeros.
|
||||
//
|
||||
// TODO: Check nsz on instructions when fast math flags are preserved to MI
|
||||
// level.
|
||||
bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
|
||||
bool IsIEEEMode = ST->enableIEEEBit(MF);
|
||||
bool HasNSZ = MFI->hasNoSignedZerosFPMath();
|
||||
|
||||
for (MachineBasicBlock *MBB : depth_first(&MF)) {
|
||||
MachineBasicBlock::iterator I, Next;
|
||||
|
@ -1007,7 +1006,10 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
|||
tryFoldInst(TII, &MI);
|
||||
|
||||
if (!TII->isFoldableCopy(MI)) {
|
||||
if (IsIEEEMode || !tryFoldOMod(MI))
|
||||
// TODO: Omod might be OK if there is NSZ only on the source
|
||||
// instruction, and not the omod multiply.
|
||||
if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
|
||||
!tryFoldOMod(MI))
|
||||
tryFoldClamp(MI);
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
--- |
|
||||
define amdgpu_ps void @omod_inst_flag_nsz_src() {
|
||||
unreachable
|
||||
}
|
||||
|
||||
define amdgpu_ps void @omod_inst_flag_nsz_result() {
|
||||
unreachable
|
||||
}
|
||||
|
||||
define amdgpu_ps void @omod_inst_flag_nsz_both() {
|
||||
unreachable
|
||||
}
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
# FIXME: Is it OK to fold omod for this?
|
||||
# GCN-LABEL: name: omod_inst_flag_nsz_src
|
||||
# GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
|
||||
# GCN-NEXT: %1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM implicit %1
|
||||
name: omod_inst_flag_nsz_src
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
%0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
|
||||
%1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
|
||||
S_ENDPGM implicit %1
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: omod_inst_flag_nsz_result
|
||||
# GCN: %0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM implicit %0
|
||||
|
||||
name: omod_inst_flag_nsz_result
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
%0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
|
||||
%1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
|
||||
S_ENDPGM implicit %1
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: omod_inst_flag_nsz_both
|
||||
# GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM implicit %0
|
||||
|
||||
name: omod_inst_flag_nsz_both
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
%0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
|
||||
%1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
|
||||
S_ENDPGM implicit %1
|
||||
...
|
Loading…
Reference in New Issue