forked from OSchip/llvm-project
[AMDGPU][clang] Fix __builtin_nontemporal_store() failure on AMDGPU
Reviewed By: yaxunl, sameerds Differential Revision: https://reviews.llvm.org/D114849
This commit is contained in:
parent
f13b43d570
commit
16b781e6d1
|
@ -170,8 +170,9 @@ static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
|
|||
|
||||
// Convert the type of the pointer to a pointer to the stored type.
|
||||
Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
|
||||
unsigned SrcAddrSpace = Address->getType()->getPointerAddressSpace();
|
||||
Value *BC = CGF.Builder.CreateBitCast(
|
||||
Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
|
||||
Address, llvm::PointerType::get(Val->getType(), SrcAddrSpace), "cast");
|
||||
LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
|
||||
LV.setNontemporal(true);
|
||||
CGF.EmitStoreOfScalar(Val, LV, false);
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
|
||||
// CHECK-LABEL: @test_non_temporal_store_kernel
|
||||
// CHECK: store i32 0, i32 addrspace(1)* %{{.*}}, align 4, !tbaa !{{.*}}, !nontemporal {{.*}}
|
||||
|
||||
kernel void test_non_temporal_store_kernel(global unsigned int* io) {
|
||||
__builtin_nontemporal_store(0, io);
|
||||
}
|
Loading…
Reference in New Issue