diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index e2ba28815a8d..6c57926b7d1a 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -994,9 +994,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { // omod is ignored by hardware if IEEE bit is enabled. omod also does not // correctly handle signed zeros. // - // TODO: Check nsz on instructions when fast math flags are preserved to MI - // level. - bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath(); + bool IsIEEEMode = ST->enableIEEEBit(MF); + bool HasNSZ = MFI->hasNoSignedZerosFPMath(); for (MachineBasicBlock *MBB : depth_first(&MF)) { MachineBasicBlock::iterator I, Next; @@ -1007,7 +1006,10 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { tryFoldInst(TII, &MI); if (!TII->isFoldableCopy(MI)) { - if (IsIEEEMode || !tryFoldOMod(MI)) + // TODO: Omod might be OK if there is NSZ only on the source + // instruction, and not the omod multiply. + if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || + !tryFoldOMod(MI)) tryFoldClamp(MI); continue; } diff --git a/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir b/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir new file mode 100644 index 000000000000..861cdda8ad6f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir @@ -0,0 +1,71 @@ +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s + +--- | + define amdgpu_ps void @omod_inst_flag_nsz_src() { + unreachable + } + + define amdgpu_ps void @omod_inst_flag_nsz_result() { + unreachable + } + + define amdgpu_ps void @omod_inst_flag_nsz_both() { + unreachable + } + +... + +--- + +# FIXME: Is it OK to fold omod for this? +# GCN-LABEL: name: omod_inst_flag_nsz_src +# GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec +# GCN-NEXT: %1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec +# GCN-NEXT: S_ENDPGM implicit %1 +name: omod_inst_flag_nsz_src +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec + %1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec + S_ENDPGM implicit %1 + +... +--- + +# GCN-LABEL: name: omod_inst_flag_nsz_result +# GCN: %0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec +# GCN-NEXT: S_ENDPGM implicit %0 + +name: omod_inst_flag_nsz_result +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + %0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec + %1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec + S_ENDPGM implicit %1 +... + +--- + +# GCN-LABEL: name: omod_inst_flag_nsz_both +# GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec +# GCN-NEXT: S_ENDPGM implicit %0 + +name: omod_inst_flag_nsz_both +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec + %1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec + S_ENDPGM implicit %1 +...