forked from OSchip/llvm-project
Revert "[OpenMP] Lower printf to __llvm_omp_vprintf"
This reverts commit db81d8f6c4
.
This commit is contained in:
parent
dc9edc6a6d
commit
0fa45d6d80
|
@ -5106,16 +5106,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
|
||||||
return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
|
return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
|
||||||
}
|
}
|
||||||
case Builtin::BIprintf:
|
case Builtin::BIprintf:
|
||||||
if (getTarget().getTriple().isNVPTX() ||
|
if (getTarget().getTriple().isNVPTX())
|
||||||
getTarget().getTriple().isAMDGCN()) {
|
return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
|
||||||
if (getLangOpts().OpenMPIsDevice)
|
if (getTarget().getTriple().getArch() == Triple::amdgcn &&
|
||||||
return EmitOpenMPDevicePrintfCallExpr(E);
|
getLangOpts().HIP)
|
||||||
if (getTarget().getTriple().isNVPTX())
|
return EmitAMDGPUDevicePrintfCallExpr(E, ReturnValue);
|
||||||
return EmitNVPTXDevicePrintfCallExpr(E);
|
|
||||||
if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
|
|
||||||
return EmitAMDGPUDevicePrintfCallExpr(E);
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case Builtin::BI__builtin_canonicalize:
|
case Builtin::BI__builtin_canonicalize:
|
||||||
case Builtin::BI__builtin_canonicalizef:
|
case Builtin::BI__builtin_canonicalizef:
|
||||||
|
|
|
@ -21,14 +21,13 @@
|
||||||
using namespace clang;
|
using namespace clang;
|
||||||
using namespace CodeGen;
|
using namespace CodeGen;
|
||||||
|
|
||||||
namespace {
|
static llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
|
||||||
llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
|
|
||||||
llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
|
llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
|
||||||
llvm::Type::getInt8PtrTy(M.getContext())};
|
llvm::Type::getInt8PtrTy(M.getContext())};
|
||||||
llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
|
llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
|
||||||
llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
|
llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
|
||||||
|
|
||||||
if (auto *F = M.getFunction("vprintf")) {
|
if (auto* F = M.getFunction("vprintf")) {
|
||||||
// Our CUDA system header declares vprintf with the right signature, so
|
// Our CUDA system header declares vprintf with the right signature, so
|
||||||
// nobody else should have been able to declare vprintf with a bogus
|
// nobody else should have been able to declare vprintf with a bogus
|
||||||
// signature.
|
// signature.
|
||||||
|
@ -42,28 +41,6 @@ llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
|
||||||
VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M);
|
VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M);
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) {
|
|
||||||
const char *Name = "__llvm_omp_vprintf";
|
|
||||||
llvm::Module &M = CGM.getModule();
|
|
||||||
llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
|
|
||||||
llvm::Type::getInt8PtrTy(M.getContext()),
|
|
||||||
llvm::Type::getInt32Ty(M.getContext())};
|
|
||||||
llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
|
|
||||||
llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
|
|
||||||
|
|
||||||
if (auto *F = M.getFunction(Name)) {
|
|
||||||
if (F->getFunctionType() != VprintfFuncType) {
|
|
||||||
CGM.Error(SourceLocation(),
|
|
||||||
"Invalid type declaration for __llvm_omp_vprintf");
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
return F;
|
|
||||||
}
|
|
||||||
|
|
||||||
return llvm::Function::Create(
|
|
||||||
VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Transforms a call to printf into a call to the NVPTX vprintf syscall (which
|
// Transforms a call to printf into a call to the NVPTX vprintf syscall (which
|
||||||
// isn't particularly special; it's invoked just like a regular function).
|
// isn't particularly special; it's invoked just like a regular function).
|
||||||
// vprintf takes two args: A format string, and a pointer to a buffer containing
|
// vprintf takes two args: A format string, and a pointer to a buffer containing
|
||||||
|
@ -90,17 +67,17 @@ llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) {
|
||||||
// Note that by the time this function runs, E's args have already undergone the
|
// Note that by the time this function runs, E's args have already undergone the
|
||||||
// standard C vararg promotion (short -> int, float -> double, etc.).
|
// standard C vararg promotion (short -> int, float -> double, etc.).
|
||||||
|
|
||||||
std::pair<llvm::Value *, llvm::TypeSize>
|
namespace {
|
||||||
packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) {
|
llvm::Value *packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF,
|
||||||
|
const CallArgList &Args) {
|
||||||
const llvm::DataLayout &DL = CGF->CGM.getDataLayout();
|
const llvm::DataLayout &DL = CGF->CGM.getDataLayout();
|
||||||
llvm::LLVMContext &Ctx = CGF->CGM.getLLVMContext();
|
llvm::LLVMContext &Ctx = CGF->CGM.getLLVMContext();
|
||||||
CGBuilderTy &Builder = CGF->Builder;
|
CGBuilderTy &Builder = CGF->Builder;
|
||||||
|
|
||||||
// Construct and fill the args buffer that we'll pass to vprintf.
|
// Construct and fill the args buffer that we'll pass to vprintf.
|
||||||
if (Args.size() <= 1) {
|
if (Args.size() <= 1) {
|
||||||
// If there are no args, pass a null pointer and size 0
|
// If there are no args, pass a null pointer to vprintf.
|
||||||
llvm::Value * BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
|
return llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
|
||||||
return {BufferPtr, llvm::TypeSize::Fixed(0)};
|
|
||||||
} else {
|
} else {
|
||||||
llvm::SmallVector<llvm::Type *, 8> ArgTypes;
|
llvm::SmallVector<llvm::Type *, 8> ArgTypes;
|
||||||
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
|
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
|
||||||
|
@ -119,64 +96,43 @@ packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) {
|
||||||
llvm::Value *Arg = Args[I].getRValue(*CGF).getScalarVal();
|
llvm::Value *Arg = Args[I].getRValue(*CGF).getScalarVal();
|
||||||
Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType()));
|
Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType()));
|
||||||
}
|
}
|
||||||
llvm::Value *BufferPtr =
|
return Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
|
||||||
Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
|
|
||||||
return {BufferPtr, DL.getTypeAllocSize(AllocaTy)};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool containsNonScalarVarargs(CodeGenFunction *CGF, CallArgList Args) {
|
|
||||||
return llvm::any_of(llvm::drop_begin(Args), [&](const CallArg &A) {
|
|
||||||
return !A.getRValue(*CGF).isScalar();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF,
|
|
||||||
llvm::Function *Decl, bool WithSizeArg) {
|
|
||||||
CodeGenModule &CGM = CGF->CGM;
|
|
||||||
CGBuilderTy &Builder = CGF->Builder;
|
|
||||||
assert(E->getBuiltinCallee() == Builtin::BIprintf);
|
|
||||||
assert(E->getNumArgs() >= 1); // printf always has at least one arg.
|
|
||||||
|
|
||||||
// Uses the same format as nvptx for the argument packing, but also passes
|
|
||||||
// an i32 for the total size of the passed pointer
|
|
||||||
CallArgList Args;
|
|
||||||
CGF->EmitCallArgs(Args,
|
|
||||||
E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
|
|
||||||
E->arguments(), E->getDirectCallee(),
|
|
||||||
/* ParamsToSkip = */ 0);
|
|
||||||
|
|
||||||
// We don't know how to emit non-scalar varargs.
|
|
||||||
if (containsNonScalarVarargs(CGF, Args)) {
|
|
||||||
CGM.ErrorUnsupported(E, "non-scalar arg to printf");
|
|
||||||
return RValue::get(llvm::ConstantInt::get(CGF->IntTy, 0));
|
|
||||||
}
|
|
||||||
|
|
||||||
auto r = packArgsIntoNVPTXFormatBuffer(CGF, Args);
|
|
||||||
llvm::Value *BufferPtr = r.first;
|
|
||||||
|
|
||||||
llvm::SmallVector<llvm::Value *, 3> Vec = {
|
|
||||||
Args[0].getRValue(*CGF).getScalarVal(), BufferPtr};
|
|
||||||
if (WithSizeArg) {
|
|
||||||
// Passing > 32bit of data as a local alloca doesn't work for nvptx or
|
|
||||||
// amdgpu
|
|
||||||
llvm::Constant *Size =
|
|
||||||
llvm::ConstantInt::get(llvm::Type::getInt32Ty(CGM.getLLVMContext()),
|
|
||||||
static_cast<uint32_t>(r.second.getFixedSize()));
|
|
||||||
|
|
||||||
Vec.push_back(Size);
|
|
||||||
}
|
|
||||||
return RValue::get(Builder.CreateCall(Decl, Vec));
|
|
||||||
}
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
RValue CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E) {
|
RValue
|
||||||
|
CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
|
||||||
|
ReturnValueSlot ReturnValue) {
|
||||||
assert(getTarget().getTriple().isNVPTX());
|
assert(getTarget().getTriple().isNVPTX());
|
||||||
return EmitDevicePrintfCallExpr(
|
assert(E->getBuiltinCallee() == Builtin::BIprintf);
|
||||||
E, this, GetVprintfDeclaration(CGM.getModule()), false);
|
assert(E->getNumArgs() >= 1); // printf always has at least one arg.
|
||||||
|
|
||||||
|
CallArgList Args;
|
||||||
|
EmitCallArgs(Args,
|
||||||
|
E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
|
||||||
|
E->arguments(), E->getDirectCallee(),
|
||||||
|
/* ParamsToSkip = */ 0);
|
||||||
|
|
||||||
|
// We don't know how to emit non-scalar varargs.
|
||||||
|
if (llvm::any_of(llvm::drop_begin(Args), [&](const CallArg &A) {
|
||||||
|
return !A.getRValue(*this).isScalar();
|
||||||
|
})) {
|
||||||
|
CGM.ErrorUnsupported(E, "non-scalar arg to printf");
|
||||||
|
return RValue::get(llvm::ConstantInt::get(IntTy, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Value *BufferPtr = packArgsIntoNVPTXFormatBuffer(this, Args);
|
||||||
|
|
||||||
|
// Invoke vprintf and return.
|
||||||
|
llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule());
|
||||||
|
return RValue::get(Builder.CreateCall(
|
||||||
|
VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr}));
|
||||||
}
|
}
|
||||||
|
|
||||||
RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
|
RValue
|
||||||
|
CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
|
||||||
|
ReturnValueSlot ReturnValue) {
|
||||||
assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn);
|
assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn);
|
||||||
assert(E->getBuiltinCallee() == Builtin::BIprintf ||
|
assert(E->getBuiltinCallee() == Builtin::BIprintf ||
|
||||||
E->getBuiltinCallee() == Builtin::BI__builtin_printf);
|
E->getBuiltinCallee() == Builtin::BI__builtin_printf);
|
||||||
|
@ -206,10 +162,3 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
|
||||||
Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
|
Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
|
||||||
return RValue::get(Printf);
|
return RValue::get(Printf);
|
||||||
}
|
}
|
||||||
|
|
||||||
RValue CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr *E) {
|
|
||||||
assert(getTarget().getTriple().isNVPTX() ||
|
|
||||||
getTarget().getTriple().isAMDGCN());
|
|
||||||
return EmitDevicePrintfCallExpr(E, this, GetOpenMPVprintfDeclaration(CGM),
|
|
||||||
true);
|
|
||||||
}
|
|
||||||
|
|
|
@ -4070,9 +4070,10 @@ public:
|
||||||
RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
|
RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
|
||||||
ReturnValueSlot ReturnValue);
|
ReturnValueSlot ReturnValue);
|
||||||
|
|
||||||
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E);
|
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
|
||||||
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E);
|
ReturnValueSlot ReturnValue);
|
||||||
RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E);
|
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
|
||||||
|
ReturnValueSlot ReturnValue);
|
||||||
|
|
||||||
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
|
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
|
||||||
const CallExpr *E, ReturnValueSlot ReturnValue);
|
const CallExpr *E, ReturnValueSlot ReturnValue);
|
||||||
|
|
|
@ -34,15 +34,23 @@ void __assert_fail(const char *assertion, const char *file, unsigned line,
|
||||||
///}
|
///}
|
||||||
|
|
||||||
/// Print
|
/// Print
|
||||||
/// printf() calls are rewritten by CGGPUBuiltin to __llvm_omp_vprintf
|
/// TODO: For now we have to use macros to guard the code because Clang lowers
|
||||||
|
/// `printf` to different function calls on NVPTX and AMDGCN platforms, and it
|
||||||
|
/// doesn't work for AMDGCN. After it can work on AMDGCN, we will remove the
|
||||||
|
/// macro.
|
||||||
/// {
|
/// {
|
||||||
|
|
||||||
|
#ifndef __AMDGCN__
|
||||||
extern "C" {
|
extern "C" {
|
||||||
int printf(const char *format, ...);
|
int printf(const char *format, ...);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define PRINTF(fmt, ...) (void)printf(fmt, ##__VA_ARGS__);
|
#define PRINTF(fmt, ...) (void)printf(fmt, __VA_ARGS__);
|
||||||
#define PRINT(str) PRINTF("%s", str)
|
#define PRINT(str) PRINTF("%s", str)
|
||||||
|
#else
|
||||||
|
#define PRINTF(fmt, ...)
|
||||||
|
#define PRINT(str)
|
||||||
|
#endif
|
||||||
|
|
||||||
///}
|
///}
|
||||||
|
|
||||||
|
|
|
@ -29,29 +29,6 @@ void __assert_fail(const char *assertion, const char *file, unsigned line,
|
||||||
assertion);
|
assertion);
|
||||||
__builtin_trap();
|
__builtin_trap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma omp begin declare variant match( \
|
|
||||||
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
|
|
||||||
int32_t vprintf(const char *, void *);
|
|
||||||
namespace impl {
|
|
||||||
static int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
|
|
||||||
return vprintf(Format, Arguments);
|
|
||||||
}
|
|
||||||
} // namespace impl
|
|
||||||
#pragma omp end declare variant
|
|
||||||
|
|
||||||
// We do not have a vprintf implementation for AMD GPU yet so we use a stub.
|
|
||||||
#pragma omp begin declare variant match(device = {arch(amdgcn)})
|
|
||||||
namespace impl {
|
|
||||||
static int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
} // namespace impl
|
|
||||||
#pragma omp end declare variant
|
|
||||||
|
|
||||||
int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) {
|
|
||||||
return impl::omp_vprintf(Format, Arguments, Size);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Current indentation level for the function trace. Only accessed by thread 0.
|
/// Current indentation level for the function trace. Only accessed by thread 0.
|
||||||
|
|
|
@ -184,11 +184,6 @@ __attribute__((weak)) EXTERN void *__kmpc_impl_malloc(size_t) {
|
||||||
}
|
}
|
||||||
__attribute__((weak)) EXTERN void __kmpc_impl_free(void *) {}
|
__attribute__((weak)) EXTERN void __kmpc_impl_free(void *) {}
|
||||||
|
|
||||||
EXTERN
|
|
||||||
int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
EXTERN void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi) {
|
EXTERN void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi) {
|
||||||
lo = (uint32_t)(val & UINT64_C(0x00000000FFFFFFFF));
|
lo = (uint32_t)(val & UINT64_C(0x00000000FFFFFFFF));
|
||||||
hi = (uint32_t)((val & UINT64_C(0xFFFFFFFF00000000)) >> 32);
|
hi = (uint32_t)((val & UINT64_C(0xFFFFFFFF00000000)) >> 32);
|
||||||
|
|
|
@ -184,15 +184,9 @@ EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock) {
|
||||||
extern "C" {
|
extern "C" {
|
||||||
void *malloc(size_t);
|
void *malloc(size_t);
|
||||||
void free(void *);
|
void free(void *);
|
||||||
int32_t vprintf(const char *, void *);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EXTERN void *__kmpc_impl_malloc(size_t x) { return malloc(x); }
|
EXTERN void *__kmpc_impl_malloc(size_t x) { return malloc(x); }
|
||||||
EXTERN void __kmpc_impl_free(void *x) { free(x); }
|
EXTERN void __kmpc_impl_free(void *x) { free(x); }
|
||||||
|
|
||||||
EXTERN int32_t __llvm_omp_vprintf(const char *Format, void *Arguments,
|
|
||||||
uint32_t) {
|
|
||||||
return vprintf(Format, Arguments);
|
|
||||||
}
|
|
||||||
|
|
||||||
#pragma omp end declare target
|
#pragma omp end declare target
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
// RUN: %libomptarget-compilexx-run-and-check-generic
|
// RUN: %libomptarget-compilexx-run-and-check-generic
|
||||||
|
|
||||||
// Wrong results on amdgpu
|
// amdgcn does not have printf definition
|
||||||
// XFAIL: amdgcn-amd-amdhsa
|
// XFAIL: amdgcn-amd-amdhsa
|
||||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
// RUN: %libomptarget-compilexx-run-and-check-generic
|
// RUN: %libomptarget-compilexx-run-and-check-generic
|
||||||
|
|
||||||
// Wrong results on amdgpu
|
// amdgcn does not have printf definition
|
||||||
// XFAIL: amdgcn-amd-amdhsa
|
// XFAIL: amdgcn-amd-amdhsa
|
||||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
// RUN: %libomptarget-compilexx-run-and-check-generic
|
// RUN: %libomptarget-compilexx-run-and-check-generic
|
||||||
|
|
||||||
// Wrong results on amdgpu
|
// amdgcn does not have printf definition
|
||||||
// XFAIL: amdgcn-amd-amdhsa
|
// XFAIL: amdgcn-amd-amdhsa
|
||||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
// RUN: %libomptarget-compilexx-run-and-check-generic
|
// RUN: %libomptarget-compilexx-run-and-check-generic
|
||||||
|
|
||||||
// Wrong results on amdgpu
|
// amdgcn does not have printf definition
|
||||||
// XFAIL: amdgcn-amd-amdhsa
|
// XFAIL: amdgcn-amd-amdhsa
|
||||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
// RUN: %libomptarget-compile-generic -fopenmp-extensions
|
// RUN: %libomptarget-compile-generic -fopenmp-extensions
|
||||||
// RUN: %libomptarget-run-generic | %fcheck-generic -strict-whitespace
|
// RUN: %libomptarget-run-generic | %fcheck-generic -strict-whitespace
|
||||||
|
|
||||||
// Wrong results on amdgpu
|
// amdgcn does not have printf definition
|
||||||
// XFAIL: amdgcn-amd-amdhsa
|
// XFAIL: amdgcn-amd-amdhsa
|
||||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,9 @@
|
||||||
// RUN: %libomptarget-compile-run-and-check-generic
|
// RUN: %libomptarget-compile-run-and-check-generic
|
||||||
|
|
||||||
|
// amdgcn does not have printf definition
|
||||||
|
// XFAIL: amdgcn-amd-amdhsa
|
||||||
|
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
// RUN: %libomptarget-compilexx-run-and-check-generic
|
// RUN: %libomptarget-compilexx-run-and-check-generic
|
||||||
|
|
||||||
// Wrong results on amdgpu
|
// amdgcn does not have printf definition
|
||||||
// XFAIL: amdgcn-amd-amdhsa
|
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
// RUN: %libomptarget-compilexx-generic -O3 && %libomptarget-run-generic
|
// RUN: %libomptarget-compilexx-generic -O3 && %libomptarget-run-generic
|
||||||
|
|
||||||
// Wrong results on amdgpu
|
// Wrong results on amdgcn
|
||||||
// XFAIL: amdgcn-amd-amdhsa
|
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||||
|
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
// RUN: %libomptarget-compilexx-and-run-generic
|
// RUN: %libomptarget-compilexx-and-run-generic
|
||||||
|
|
||||||
|
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||||
|
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
|
|
||||||
// RUN: %libomptarget-compile-run-and-check-generic
|
// RUN: %libomptarget-compile-run-and-check-generic
|
||||||
|
|
||||||
// amdgpu does not have a working printf definition
|
// amdgcn does not have printf definition
|
||||||
// XFAIL: amdgcn-amd-amdhsa
|
// XFAIL: amdgcn-amd-amdhsa
|
||||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
// XFAIL: nvptx64-nvidia-cuda
|
// XFAIL: nvptx64-nvidia-cuda
|
||||||
// XFAIL: nvptx64-nvidia-cuda-newRTL
|
// XFAIL: nvptx64-nvidia-cuda-newRTL
|
||||||
|
|
||||||
// Fails on amdgpu with error: GPU Memory Error
|
// Fails on amdgcn with error: GPU Memory Error
|
||||||
// XFAIL: amdgcn-amd-amdhsa
|
// XFAIL: amdgcn-amd-amdhsa
|
||||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
// REQUIRES: unified_shared_memory
|
// REQUIRES: unified_shared_memory
|
||||||
// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
|
// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
|
||||||
|
|
||||||
// Fails on amdgpu with error: GPU Memory Error
|
// Fails on amdgcn with error: GPU Memory Error
|
||||||
// XFAIL: amdgcn-amd-amdhsa
|
// XFAIL: amdgcn-amd-amdhsa
|
||||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
|
|
|
@ -3,9 +3,9 @@
|
||||||
// REQUIRES: unified_shared_memory
|
// REQUIRES: unified_shared_memory
|
||||||
// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
|
// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
|
||||||
|
|
||||||
// amdgpu runtime crash
|
// amdgcn does not have printf definition
|
||||||
// UNSUPPORTED: amdgcn-amd-amdhsa
|
// XFAIL: amdgcn-amd-amdhsa
|
||||||
|
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
|
@ -2,8 +2,9 @@
|
||||||
|
|
||||||
// REQUIRES: unified_shared_memory
|
// REQUIRES: unified_shared_memory
|
||||||
|
|
||||||
// amdgpu runtime crash
|
// amdgcn does not have printf definition
|
||||||
// UNSUPPORTED: amdgcn-amd-amdhsa
|
// XFAIL: amdgcn-amd-amdhsa
|
||||||
|
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
|
|
Loading…
Reference in New Issue