From 16b781e6d16dead414a7036c8b59f1700ea49251 Mon Sep 17 00:00:00 2001 From: skc7 Date: Wed, 1 Dec 2021 06:12:57 +0000 Subject: [PATCH] [AMDGPU][clang] Fix __builtin_nontemporal_store() failure on AMDGPU Reviewed By: yaxunl, sameerds Differential Revision: https://reviews.llvm.org/D114849 --- clang/lib/CodeGen/CGBuiltin.cpp | 3 ++- clang/test/CodeGenOpenCL/amdgcn-non-temporal-store.cl | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenOpenCL/amdgcn-non-temporal-store.cl diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 849423c8b9ba..5d6df59cc405 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -170,8 +170,9 @@ static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { // Convert the type of the pointer to a pointer to the stored type. Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); + unsigned SrcAddrSpace = Address->getType()->getPointerAddressSpace(); Value *BC = CGF.Builder.CreateBitCast( - Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); + Address, llvm::PointerType::get(Val->getType(), SrcAddrSpace), "cast"); LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); LV.setNontemporal(true); CGF.EmitStoreOfScalar(Val, LV, false); diff --git a/clang/test/CodeGenOpenCL/amdgcn-non-temporal-store.cl b/clang/test/CodeGenOpenCL/amdgcn-non-temporal-store.cl new file mode 100644 index 000000000000..539d857080e2 --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgcn-non-temporal-store.cl @@ -0,0 +1,8 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s +// CHECK-LABEL: @test_non_temporal_store_kernel +// CHECK: store i32 0, i32 addrspace(1)* %{{.*}}, align 4, !tbaa !{{.*}}, !nontemporal {{.*}} + +kernel void test_non_temporal_store_kernel(global unsigned int* io) { + __builtin_nontemporal_store(0, io); +}