[Remarks] [AMDGPU] Emit optimization remarks for atomics generating hardware instructions

Produce remarks when atomic instructions are expanded into hardware instructions
in SIISelLowering.cpp. Currently, these remarks are only emitted for atomic fadd
instructions.

Differential Revision: https://reviews.llvm.org/D108150
This commit is contained in:
Anshil Gandhi 2021-08-19 20:50:36 -06:00
parent 9d4faa8ac3
commit 508b06699a
6 changed files with 169 additions and 8 deletions

View File

@ -0,0 +1,44 @@
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \
// RUN: -Rpass=si-lower -munsafe-fp-atomics %s -S -emit-llvm -o - 2>&1 | \
// RUN: FileCheck %s --check-prefix=GFX90A-HW
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \
// RUN: -Rpass=si-lower -munsafe-fp-atomics %s -S -o - 2>&1 | \
// RUN: FileCheck %s --check-prefix=GFX90A-HW-REMARK
// REQUIRES: amdgpu-registered-target
typedef enum memory_order {
memory_order_relaxed = __ATOMIC_RELAXED,
memory_order_acquire = __ATOMIC_ACQUIRE,
memory_order_release = __ATOMIC_RELEASE,
memory_order_acq_rel = __ATOMIC_ACQ_REL,
memory_order_seq_cst = __ATOMIC_SEQ_CST
} memory_order;
typedef enum memory_scope {
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
#endif
} memory_scope;
// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope workgroup-one-as due to an unsafe request. [-Rpass=si-lower]
// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope agent-one-as due to an unsafe request. [-Rpass=si-lower]
// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope wavefront-one-as due to an unsafe request. [-Rpass=si-lower]
// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc
// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc
// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc
// GFX90A-HW-LABEL: @atomic_unsafe_hw
// GFX90A-HW: atomicrmw fadd float addrspace(1)* %{{.*}}, float %{{.*}} syncscope("workgroup-one-as") monotonic, align 4
// GFX90A-HW: atomicrmw fadd float addrspace(1)* %{{.*}}, float %{{.*}} syncscope("agent-one-as") monotonic, align 4
// GFX90A-HW: atomicrmw fadd float addrspace(1)* %{{.*}}, float %{{.*}} syncscope("wavefront-one-as") monotonic, align 4
void atomic_unsafe_hw(__global atomic_float *d, float a) {
float ret1 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
float ret2 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_device);
float ret3 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_sub_group);
}

View File

@ -610,7 +610,7 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
: SSNs[AI->getSyncScopeID()];
OptimizationRemarkEmitter ORE(AI->getFunction());
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Passed", AI->getFunction())
return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
<< "A compare and swap loop was generated for an atomic "
<< AI->getOperationName(AI->getOperation()) << " operation at "
<< MemScope << " memory scope";

View File

@ -19,6 +19,7 @@
#include "SIRegisterInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@ -12129,6 +12130,25 @@ static bool fpModeMatchesGlobalFPAtomicMode(const AtomicRMWInst *RMW) {
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
auto ReportUnsafeHWInst = [&](TargetLowering::AtomicExpansionKind Kind) {
OptimizationRemarkEmitter ORE(RMW->getFunction());
LLVMContext &Ctx = RMW->getFunction()->getContext();
SmallVector<StringRef> SSNs;
Ctx.getSyncScopeNames(SSNs);
auto MemScope = SSNs[RMW->getSyncScopeID()].empty()
? "system"
: SSNs[RMW->getSyncScopeID()];
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Passed", RMW)
<< "Hardware instruction generated for atomic "
<< RMW->getOperationName(RMW->getOperation())
<< " operation at memory scope " << MemScope
<< " due to an unsafe request.";
});
return Kind;
};
switch (RMW->getOperation()) {
case AtomicRMWInst::FAdd: {
Type *Ty = RMW->getType();
@ -12163,13 +12183,13 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
SSID == RMW->getContext().getOrInsertSyncScopeID("one-as"))
return AtomicExpansionKind::CmpXChg;
return AtomicExpansionKind::None;
return ReportUnsafeHWInst(AtomicExpansionKind::None);
}
if (AS == AMDGPUAS::FLAT_ADDRESS)
return AtomicExpansionKind::CmpXChg;
return RMW->use_empty() ? AtomicExpansionKind::None
return RMW->use_empty() ? ReportUnsafeHWInst(AtomicExpansionKind::None)
: AtomicExpansionKind::CmpXChg;
}
@ -12180,11 +12200,13 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
if (!Ty->isDoubleTy())
return AtomicExpansionKind::None;
return (fpModeMatchesGlobalFPAtomicMode(RMW) ||
RMW->getFunction()
->getFnAttribute("amdgpu-unsafe-fp-atomics")
.getValueAsString() == "true")
? AtomicExpansionKind::None
if (fpModeMatchesGlobalFPAtomicMode(RMW))
return AtomicExpansionKind::None;
return RMW->getFunction()
->getFnAttribute("amdgpu-unsafe-fp-atomics")
.getValueAsString() == "true"
? ReportUnsafeHWInst(AtomicExpansionKind::None)
: AtomicExpansionKind::CmpXChg;
}

View File

@ -0,0 +1,95 @@
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs --pass-remarks=si-lower \
; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=GFX90A-HW
; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope system due to an unsafe request.
; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope agent due to an unsafe request.
; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope workgroup due to an unsafe request.
; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope wavefront due to an unsafe request.
; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope singlethread due to an unsafe request.
; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope agent-one-as due to an unsafe request.
; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope workgroup-one-as due to an unsafe request.
; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope wavefront-one-as due to an unsafe request.
; GFX90A-HW: Hardware instruction generated for atomic fadd operation at memory scope singlethread-one-as due to an unsafe request.
; GFX90A-HW-LABEL: atomic_add_unsafe_hw:
; GFX90A-HW: ds_add_f64 v2, v[0:1]
; GFX90A-HW: s_endpgm
define amdgpu_kernel void @atomic_add_unsafe_hw(double addrspace(3)* %ptr) #0 {
main_body:
%ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
ret void
}
; GFX90A-HW-LABEL: atomic_add_unsafe_hw_agent:
; GFX90A-HW: global_atomic_add_f32 v0, v1, s[2:3]
; GFX90A-HW: s_endpgm
define amdgpu_kernel void @atomic_add_unsafe_hw_agent(float addrspace(1)* %ptr, float %val) #0 {
main_body:
%ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("agent") monotonic, align 4
ret void
}
; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wg:
; GFX90A-HW: global_atomic_add_f32 v0, v1, s[2:3]
; GFX90A-HW: s_endpgm
define amdgpu_kernel void @atomic_add_unsafe_hw_wg(float addrspace(1)* %ptr, float %val) #0 {
main_body:
%ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("workgroup") monotonic, align 4
ret void
}
; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wavefront:
; GFX90A-HW: global_atomic_add_f32 v0, v1, s[2:3]
; GFX90A-HW: s_endpgm
define amdgpu_kernel void @atomic_add_unsafe_hw_wavefront(float addrspace(1)* %ptr, float %val) #0 {
main_body:
%ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("wavefront") monotonic, align 4
ret void
}
; GFX90A-HW-LABEL: atomic_add_unsafe_hw_single_thread:
; GFX90A-HW: global_atomic_add_f32 v0, v1, s[2:3]
; GFX90A-HW: s_endpgm
define amdgpu_kernel void @atomic_add_unsafe_hw_single_thread(float addrspace(1)* %ptr, float %val) #0 {
main_body:
%ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("singlethread") monotonic, align 4
ret void
}
; GFX90A-HW-LABEL: atomic_add_unsafe_hw_aoa:
; GFX90A-HW: global_atomic_add_f32 v0, v1, s[2:3]
; GFX90A-HW: s_endpgm
define amdgpu_kernel void @atomic_add_unsafe_hw_aoa(float addrspace(1)* %ptr, float %val) #0 {
main_body:
%ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("agent-one-as") monotonic, align 4
ret void
}
; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wgoa:
; GFX90A-HW: global_atomic_add_f32 v0, v1, s[2:3]
; GFX90A-HW: s_endpgm
define amdgpu_kernel void @atomic_add_unsafe_hw_wgoa(float addrspace(1)* %ptr, float %val) #0 {
main_body:
%ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("workgroup-one-as") monotonic, align 4
ret void
}
; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wfoa:
; GFX90A-HW: global_atomic_add_f32 v0, v1, s[2:3]
; GFX90A-HW: s_endpgm
define amdgpu_kernel void @atomic_add_unsafe_hw_wfoa(float addrspace(1)* %ptr, float %val) #0 {
main_body:
%ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("wavefront-one-as") monotonic, align 4
ret void
}
; GFX90A-HW-LABEL: atomic_add_unsafe_hw_stoa:
; GFX90A-HW: global_atomic_add_f32 v0, v1, s[2:3]
; GFX90A-HW: s_endpgm
define amdgpu_kernel void @atomic_add_unsafe_hw_stoa(float addrspace(1)* %ptr, float %val) #0 {
main_body:
%ret = atomicrmw fadd float addrspace(1)* %ptr, float %val syncscope("singlethread-one-as") monotonic, align 4
ret void
}
attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }