forked from OSchip/llvm-project
[AMDGPU] Expose llvm fence instruction as clang intrinsic
Expose llvm fence instruction as clang builtin for AMDGPU target __builtin_amdgcn_fence(unsigned int memoryOrdering, const char *syncScope) The first argument of this builtin is one of the memory-ordering specifiers __ATOMIC_ACQUIRE, __ATOMIC_RELEASE, __ATOMIC_ACQ_REL, or __ATOMIC_SEQ_CST following C++11 memory model semantics. This is mapped to corresponding LLVM atomic memory ordering for the fence instruction using LLVM atomic C ABI. The second argument is an AMDGPU-specific synchronization scope defined as string. Reviewed By: sameerds Differential Revision: https://reviews.llvm.org/D75917
This commit is contained in:
parent
84eff8cef6
commit
06bdffb2bb
|
@ -57,6 +57,7 @@ BUILTIN(__builtin_amdgcn_ds_gws_barrier, "vUiUi", "n")
|
|||
BUILTIN(__builtin_amdgcn_ds_gws_sema_v, "vUi", "n")
|
||||
BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n")
|
||||
BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n")
|
||||
BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
|
||||
|
||||
// FIXME: Need to disallow constant address space.
|
||||
BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")
|
||||
|
|
|
@ -11992,6 +11992,7 @@ private:
|
|||
bool CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, CallExpr *TheCall);
|
||||
bool CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
|
||||
bool CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
|
||||
bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
|
||||
|
||||
bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall);
|
||||
bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call);
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "clang/CodeGen/CGFunctionInfo.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/InlineAsm.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
|
@ -14131,6 +14132,43 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
|
|||
Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
|
||||
return Builder.CreateCall(F, { Src0, Src1, Src2 });
|
||||
}
|
||||
|
||||
case AMDGPU::BI__builtin_amdgcn_fence: {
|
||||
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
|
||||
llvm::SyncScope::ID SSID;
|
||||
Value *Order = EmitScalarExpr(E->getArg(0));
|
||||
Value *Scope = EmitScalarExpr(E->getArg(1));
|
||||
|
||||
if (isa<llvm::ConstantInt>(Order)) {
|
||||
int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
|
||||
|
||||
// Map C11/C++11 memory ordering to LLVM memory ordering
|
||||
switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
|
||||
case llvm::AtomicOrderingCABI::acquire:
|
||||
AO = llvm::AtomicOrdering::Acquire;
|
||||
break;
|
||||
case llvm::AtomicOrderingCABI::release:
|
||||
AO = llvm::AtomicOrdering::Release;
|
||||
break;
|
||||
case llvm::AtomicOrderingCABI::acq_rel:
|
||||
AO = llvm::AtomicOrdering::AcquireRelease;
|
||||
break;
|
||||
case llvm::AtomicOrderingCABI::seq_cst:
|
||||
AO = llvm::AtomicOrdering::SequentiallyConsistent;
|
||||
break;
|
||||
case llvm::AtomicOrderingCABI::consume: // not supported by LLVM fence
|
||||
case llvm::AtomicOrderingCABI::relaxed: // not supported by LLVM fence
|
||||
break;
|
||||
}
|
||||
|
||||
StringRef scp;
|
||||
llvm::getConstantStringInfo(Scope, scp);
|
||||
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
|
||||
|
||||
return Builder.CreateFence(AO, SSID);
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
}
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -1920,6 +1920,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
|
|||
if (CheckPPCBuiltinFunctionCall(BuiltinID, TheCall))
|
||||
return ExprError();
|
||||
break;
|
||||
case llvm::Triple::amdgcn:
|
||||
if (CheckAMDGCNBuiltinFunctionCall(BuiltinID, TheCall))
|
||||
return ExprError();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -3033,6 +3037,46 @@ bool Sema::CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
return SemaBuiltinConstantArgRange(TheCall, i, l, u);
|
||||
}
|
||||
|
||||
bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
|
||||
CallExpr *TheCall) {
|
||||
switch (BuiltinID) {
|
||||
case AMDGPU::BI__builtin_amdgcn_fence: {
|
||||
ExprResult Arg = TheCall->getArg(0);
|
||||
auto ArgExpr = Arg.get();
|
||||
Expr::EvalResult ArgResult;
|
||||
|
||||
if (!ArgExpr->EvaluateAsInt(ArgResult, Context))
|
||||
return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
|
||||
<< ArgExpr->getType();
|
||||
int ord = ArgResult.Val.getInt().getZExtValue();
|
||||
|
||||
// Check valididty of memory ordering as per C11 / C++11's memody model.
|
||||
switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
|
||||
case llvm::AtomicOrderingCABI::acquire:
|
||||
case llvm::AtomicOrderingCABI::release:
|
||||
case llvm::AtomicOrderingCABI::acq_rel:
|
||||
case llvm::AtomicOrderingCABI::seq_cst:
|
||||
break;
|
||||
default: {
|
||||
return Diag(ArgExpr->getBeginLoc(),
|
||||
diag::warn_atomic_op_has_invalid_memory_order)
|
||||
<< ArgExpr->getSourceRange();
|
||||
}
|
||||
}
|
||||
|
||||
Arg = TheCall->getArg(1);
|
||||
ArgExpr = Arg.get();
|
||||
Expr::EvalResult ArgResult1;
|
||||
// Check that sync scope is a constant literal
|
||||
if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, Expr::EvaluateForCodeGen,
|
||||
Context))
|
||||
return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal)
|
||||
<< ArgExpr->getType();
|
||||
} break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID,
|
||||
CallExpr *TheCall) {
|
||||
if (BuiltinID == SystemZ::BI__builtin_tabort) {
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
|
||||
// RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s
|
||||
|
||||
void test_memory_fence_success() {
|
||||
// CHECK-LABEL: test_memory_fence_success
|
||||
|
||||
// CHECK: fence syncscope("workgroup") seq_cst
|
||||
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
|
||||
|
||||
// CHECK: fence syncscope("agent") acquire
|
||||
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
|
||||
|
||||
// CHECK: fence seq_cst
|
||||
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
|
||||
|
||||
// CHECK: fence syncscope("agent") acq_rel
|
||||
__builtin_amdgcn_fence(4, "agent");
|
||||
|
||||
// CHECK: fence syncscope("workgroup") release
|
||||
__builtin_amdgcn_fence(3, "workgroup");
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: not %clang_cc1 %s -S \
|
||||
// RUN: -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
|
||||
|
||||
void test_amdgcn_fence_failure() {
|
||||
|
||||
// CHECK: error: Unsupported atomic synchronization scope
|
||||
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "foobar");
|
||||
}
|
|
@ -128,3 +128,14 @@ void test_ds_fmaxf(local float *out, float src, int a) {
|
|||
*out = __builtin_amdgcn_ds_fmaxf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
|
||||
*out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
|
||||
}
|
||||
|
||||
void test_fence() {
|
||||
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
|
||||
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
|
||||
__builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}}
|
||||
__builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
|
||||
__builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
|
||||
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
|
||||
const char ptr[] = "workgroup";
|
||||
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue