Allow prefetching from non-zero address spaces

Summary:
This is useful for targets which have prefetch instructions for non-default address spaces.

<rdar://problem/42662136>

Subscribers: nemanjai, javed.absar, hiraditya, kbarton, jkorous, dexonsmith, cfe-commits, llvm-commits, RKSimon, hfinkel, t.p.northover, craig.topper, anemet

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D65254

llvm-svn: 367032
This commit is contained in:
JF Bastien 2019-07-25 16:11:57 +00:00
parent eb3c1ca896
commit dbc0a5df8d
23 changed files with 92 additions and 58 deletions

View File

@ -2133,7 +2133,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
llvm::ConstantInt::get(Int32Ty, 3);
Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
}
case Builtin::BI__builtin_readcyclecounter: {
@ -6021,7 +6021,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
// Locality is not supported on ARM target
Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
return Builder.CreateCall(F, {Address, RW, Locality, IsData});
}
@ -6960,7 +6960,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
// FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
// PLDL3STRM or PLDL2STRM.
Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
return Builder.CreateCall(F, {Address, RW, Locality, IsData});
}
@ -10037,7 +10037,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
Value *Data = ConstantInt::get(Int32Ty, 1);
Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
return Builder.CreateCall(F, {Address, RW, Locality, Data});
}
case X86::BI_mm_clflush: {

View File

@ -5375,8 +5375,8 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
QualType DeclType = FDecl->getType();
const FunctionProtoType *FT = dyn_cast<FunctionProtoType>(DeclType);
if (!Context.BuiltinInfo.hasPtrArgsOrResult(FDecl->getBuiltinID()) ||
!FT || FT->isVariadic() || ArgExprs.size() != FT->getNumParams())
if (!Context.BuiltinInfo.hasPtrArgsOrResult(FDecl->getBuiltinID()) || !FT ||
ArgExprs.size() < FT->getNumParams())
return nullptr;
bool NeedsNewDecl = false;
@ -5415,6 +5415,7 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
return nullptr;
FunctionProtoType::ExtProtoInfo EPI;
EPI.Variadic = FT->isVariadic();
QualType OverloadTy = Context.getFunctionType(FT->getReturnType(),
OverloadParams, EPI);
DeclContext *Parent = FDecl->getParent();

View File

@ -88,28 +88,28 @@ void test_swp(uint32_t x, volatile void *p) {
/* 8.6 Memory prefetch intrinsics */
/* 8.6.1 Data prefetch */
// ARM-LABEL: test_pld
// ARM: call void @llvm.prefetch(i8* null, i32 0, i32 3, i32 1)
// ARM: call void @llvm.prefetch.p0i8(i8* null, i32 0, i32 3, i32 1)
void test_pld() {
__pld(0);
}
// ARM-LABEL: test_pldx
// AArch32: call void @llvm.prefetch(i8* null, i32 1, i32 3, i32 1)
// AArch64: call void @llvm.prefetch(i8* null, i32 1, i32 1, i32 1)
// AArch32: call void @llvm.prefetch.p0i8(i8* null, i32 1, i32 3, i32 1)
// AArch64: call void @llvm.prefetch.p0i8(i8* null, i32 1, i32 1, i32 1)
void test_pldx() {
__pldx(1, 2, 0, 0);
}
/* 8.6.2 Instruction prefetch */
// ARM-LABEL: test_pli
// ARM: call void @llvm.prefetch(i8* null, i32 0, i32 3, i32 0)
// ARM: call void @llvm.prefetch.p0i8(i8* null, i32 0, i32 3, i32 0)
void test_pli() {
__pli(0);
}
// ARM-LABEL: test_plix
// AArch32: call void @llvm.prefetch(i8* null, i32 0, i32 3, i32 0)
// AArch64: call void @llvm.prefetch(i8* null, i32 0, i32 1, i32 0)
// AArch32: call void @llvm.prefetch.p0i8(i8* null, i32 0, i32 3, i32 0)
// AArch64: call void @llvm.prefetch.p0i8(i8* null, i32 0, i32 1, i32 0)
void test_plix() {
__plix(2, 0, 0);
}

View File

@ -92,14 +92,13 @@ unsigned rbit(unsigned a) {
void prefetch(int i) {
__builtin_arm_prefetch(&i, 0, 1);
// CHECK: call {{.*}} @llvm.prefetch(i8* %{{.*}}, i32 0, i32 3, i32 1)
// CHECK: call {{.*}} @llvm.prefetch.p0i8(i8* %{{.*}}, i32 0, i32 3, i32 1)
__builtin_arm_prefetch(&i, 1, 1);
// CHECK: call {{.*}} @llvm.prefetch(i8* %{{.*}}, i32 1, i32 3, i32 1)
// CHECK: call {{.*}} @llvm.prefetch.p0i8(i8* %{{.*}}, i32 1, i32 3, i32 1)
__builtin_arm_prefetch(&i, 1, 0);
// CHECK: call {{.*}} @llvm.prefetch(i8* %{{.*}}, i32 1, i32 3, i32 0)
// CHECK: call {{.*}} @llvm.prefetch.p0i8(i8* %{{.*}}, i32 1, i32 3, i32 0)
}
void ldc(const void *i) {

View File

@ -46,16 +46,16 @@ void barriers() {
void prefetch() {
__builtin_arm_prefetch(0, 1, 2, 0, 1); // pstl3keep
// CHECK: call {{.*}} @llvm.prefetch(i8* null, i32 1, i32 1, i32 1)
// CHECK: call {{.*}} @llvm.prefetch.p0i8(i8* null, i32 1, i32 1, i32 1)
__builtin_arm_prefetch(0, 0, 0, 1, 1); // pldl1keep
// CHECK: call {{.*}} @llvm.prefetch(i8* null, i32 0, i32 0, i32 1)
// CHECK: call {{.*}} @llvm.prefetch.p0i8(i8* null, i32 0, i32 0, i32 1)
__builtin_arm_prefetch(0, 0, 0, 1, 1); // pldl1strm
// CHECK: call {{.*}} @llvm.prefetch(i8* null, i32 0, i32 0, i32 1)
// CHECK: call {{.*}} @llvm.prefetch.p0i8(i8* null, i32 0, i32 0, i32 1)
__builtin_arm_prefetch(0, 0, 0, 0, 0); // plil1keep
// CHECK: call {{.*}} @llvm.prefetch(i8* null, i32 0, i32 3, i32 0)
// CHECK: call {{.*}} @llvm.prefetch.p0i8(i8* null, i32 0, i32 3, i32 0)
}
int32_t jcvt(double v) {

View File

@ -1435,7 +1435,7 @@ test_prefetch() {
// CHECK: store i8* {{[0-9a-zA-Z_%.]+}}, i8** {{[0-9a-zA-Z_%.]+}}, align 8
// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
// CHECK-NEXT: [[REG715:[0-9a-zA-Z_%.]+]] = load i8*, i8** {{[0-9a-zA-Z_%.]+}}, align 8
// CHECK-NEXT: call void @llvm.prefetch(i8* [[REG715]], i32 0, i32 3, i32 1)
// CHECK-NEXT: call void @llvm.prefetch.p0i8(i8* [[REG715]], i32 0, i32 3, i32 1)
// CHECK-NEXT: ret void
void __attribute__((noinline))

View File

@ -34,7 +34,7 @@ void f(void) {
// CHECK: call void @foo()
// CHECK: call i32 @abs(i32 0)
// CHECK: call i8* @strrchr(
// CHECK: call void @llvm.prefetch(
// CHECK: call void @llvm.prefetch.p0i8(
// CHECK: call i8* @memchr(
// CHECK: ret void
@ -42,4 +42,4 @@ void f(void) {
// CHECK: declare i32 @abs(i32
// CHECK: declare i8* @strrchr(i8*, i32)
// CHECK: declare i8* @memchr(
// CHECK: declare void @llvm.prefetch(
// CHECK: declare void @llvm.prefetch.p0i8(

View File

@ -0,0 +1,6 @@
// RUN: %clang_cc1 -triple x86_64-pc-linux -emit-llvm %s -o - | FileCheck %s
void f(int __attribute__((address_space(1))) * a, ...) {
__builtin_prefetch(a, 0, 1);
// CHECK: call void @llvm.prefetch.p1i8(i8 addrspace(1)* {{%.+}}, i32 0, i32 1, i32 1)
}

View File

@ -5,12 +5,12 @@
void test_m_prefetch(void *p) {
return _m_prefetch(p);
// CHECK-LABEL: define void @test_m_prefetch
// CHECK: call void @llvm.prefetch({{.*}}, i32 0, i32 3, i32 1)
// CHECK-LABEL: define void @test_m_prefetch
// CHECK: call void @llvm.prefetch.p0i8({{.*}}, i32 0, i32 3, i32 1)
}
void test_m_prefetch_w(void *p) {
return _m_prefetchw(p);
// CHECK-LABEL: define void @test_m_prefetch_w
// CHECK: call void @llvm.prefetch({{.*}}, i32 1, i32 3, i32 1)
// CHECK-LABEL: define void @test_m_prefetch_w
// CHECK: call void @llvm.prefetch.p0i8({{.*}}, i32 1, i32 3, i32 1)
}

View File

@ -503,7 +503,7 @@ __m128 test_mm_or_ps(__m128 A, __m128 B) {
void test_mm_prefetch(char const* p) {
// CHECK-LABEL: test_mm_prefetch
// CHECK: call void @llvm.prefetch(i8* {{.*}}, i32 0, i32 0, i32 1)
// CHECK: call void @llvm.prefetch.p0i8(i8* {{.*}}, i32 0, i32 0, i32 1)
_mm_prefetch(p, 0);
}

View File

@ -451,7 +451,7 @@ def int_thread_pointer : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>,
// from being reordered overly much with respect to nearby access to the same
// memory while not impeding optimization.
def int_prefetch
: Intrinsic<[], [ llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ],
: Intrinsic<[], [ llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ],
[ IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>,
ImmArg<1>, ImmArg<2>]>;
def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>;

View File

@ -789,6 +789,19 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
case 'p':
if (Name == "prefetch") {
// Handle address space overloading.
Type *Tys[] = {F->arg_begin()->getType()};
if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) {
rename(F);
NewFn =
Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
return true;
}
}
break;
case 's':
if (Name == "stackprotectorcheck") {
NewFn = nullptr;

View File

@ -312,8 +312,8 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
IRBuilder<> Builder(MemI);
Module *M = BB->getParent()->getParent();
Type *I32 = Type::getInt32Ty(BB->getContext());
Function *PrefetchFunc =
Intrinsic::getDeclaration(M, Intrinsic::prefetch);
Function *PrefetchFunc = Intrinsic::getDeclaration(
M, Intrinsic::prefetch, PrefPtrValue->getType());
Builder.CreateCall(
PrefetchFunc,
{PrefPtrValue,

View File

@ -140,6 +140,21 @@ define void @tests.lifetime.start.end() {
ret void
}
declare void @llvm.prefetch(i8*, i32, i32, i32)
define void @test.prefetch(i8* %ptr) {
; CHECK-LABEL: @test.prefetch(
; CHECK: @llvm.prefetch.p0i8(i8* %ptr, i32 0, i32 3, i32 2)
call void @llvm.prefetch(i8* %ptr, i32 0, i32 3, i32 2)
ret void
}
declare void @llvm.prefetch.p0i8(i8*, i32, i32, i32)
define void @test.prefetch.2(i8* %ptr) {
; CHECK-LABEL: @test.prefetch.2(
; CHECK: @llvm.prefetch.p0i8(i8* %ptr, i32 0, i32 3, i32 2)
call void @llvm.prefetch(i8* %ptr, i32 0, i32 3, i32 2)
ret void
}
; This is part of @test.objectsize(), since llvm.objectsize declaration gets
; emitted at the end.

View File

@ -1133,7 +1133,7 @@ define void @intrinsics.codegen() {
; CHECK: call void @llvm.stackrestore(i8* %stack)
call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch.p0i8(i8* %stack, i32 0, i32 3, i32 0)
call void @llvm.pcmarker(i32 1)
; CHECK: call void @llvm.pcmarker(i32 1)

View File

@ -1164,7 +1164,7 @@ define void @intrinsics.codegen() {
; CHECK: call void @llvm.stackrestore(i8* %stack)
call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch.p0i8(i8* %stack, i32 0, i32 3, i32 0)
call void @llvm.pcmarker(i32 1)
; CHECK: call void @llvm.pcmarker(i32 1)

View File

@ -1319,7 +1319,7 @@ define void @intrinsics.codegen() {
; CHECK: call void @llvm.stackrestore(i8* %stack)
call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch.p0i8(i8* %stack, i32 0, i32 3, i32 0)
call void @llvm.pcmarker(i32 1)
; CHECK: call void @llvm.pcmarker(i32 1)

View File

@ -1390,7 +1390,7 @@ define void @intrinsics.codegen() {
; CHECK: call void @llvm.stackrestore(i8* %stack)
call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch.p0i8(i8* %stack, i32 0, i32 3, i32 0)
call void @llvm.pcmarker(i32 1)
; CHECK: call void @llvm.pcmarker(i32 1)
@ -1588,7 +1588,7 @@ normal:
}
declare void @f.writeonly() writeonly
; CHECK: declare void @f.writeonly() #40
; CHECK: declare void @f.writeonly() #39
; CHECK: attributes #0 = { alignstack=4 }
; CHECK: attributes #1 = { alignstack=8 }
@ -1629,8 +1629,8 @@ declare void @f.writeonly() writeonly
; CHECK: attributes #36 = { argmemonly nounwind readonly }
; CHECK: attributes #37 = { argmemonly nounwind }
; CHECK: attributes #38 = { nounwind readonly }
; CHECK: attributes #39 = { inaccessiblemem_or_argmemonly nounwind }
; CHECK: attributes #40 = { writeonly }
; CHECK: attributes #39 = { writeonly }
; CHECK: attributes #40 = { inaccessiblemem_or_argmemonly nounwind }
; CHECK: attributes #41 = { builtin }
;; Metadata

View File

@ -1390,7 +1390,7 @@ define void @intrinsics.codegen() {
; CHECK: call void @llvm.stackrestore(i8* %stack)
call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch.p0i8(i8* %stack, i32 0, i32 3, i32 0)
call void @llvm.pcmarker(i32 1)
; CHECK: call void @llvm.pcmarker(i32 1)
@ -1606,7 +1606,7 @@ normal:
declare void @f.writeonly() writeonly
; CHECK: declare void @f.writeonly() #40
; CHECK: declare void @f.writeonly() #39
;; Constant Expressions
@ -1654,8 +1654,8 @@ define i8** @constexpr() {
; CHECK: attributes #36 = { argmemonly nounwind readonly }
; CHECK: attributes #37 = { argmemonly nounwind }
; CHECK: attributes #38 = { nounwind readonly }
; CHECK: attributes #39 = { inaccessiblemem_or_argmemonly nounwind }
; CHECK: attributes #40 = { writeonly }
; CHECK: attributes #39 = { writeonly }
; CHECK: attributes #40 = { inaccessiblemem_or_argmemonly nounwind }
; CHECK: attributes #41 = { builtin }
;; Metadata

View File

@ -1400,7 +1400,7 @@ define void @intrinsics.codegen() {
; CHECK: call void @llvm.stackrestore(i8* %stack)
call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch.p0i8(i8* %stack, i32 0, i32 3, i32 0)
call void @llvm.pcmarker(i32 1)
; CHECK: call void @llvm.pcmarker(i32 1)
@ -1616,10 +1616,10 @@ normal:
declare void @f.writeonly() writeonly
; CHECK: declare void @f.writeonly() #40
; CHECK: declare void @f.writeonly() #39
declare void @f.speculatable() speculatable
; CHECK: declare void @f.speculatable() #41
; CHECK: declare void @f.speculatable() #40
;; Constant Expressions
@ -1667,9 +1667,9 @@ define i8** @constexpr() {
; CHECK: attributes #36 = { argmemonly nounwind readonly }
; CHECK: attributes #37 = { argmemonly nounwind }
; CHECK: attributes #38 = { nounwind readonly }
; CHECK: attributes #39 = { inaccessiblemem_or_argmemonly nounwind }
; CHECK: attributes #40 = { writeonly }
; CHECK: attributes #41 = { speculatable }
; CHECK: attributes #39 = { writeonly }
; CHECK: attributes #40 = { speculatable }
; CHECK: attributes #41 = { inaccessiblemem_or_argmemonly nounwind }
; CHECK: attributes #42 = { builtin }
; CHECK: attributes #43 = { strictfp }

View File

@ -1411,7 +1411,7 @@ define void @intrinsics.codegen() {
; CHECK: call void @llvm.stackrestore(i8* %stack)
call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch.p0i8(i8* %stack, i32 0, i32 3, i32 0)
call void @llvm.pcmarker(i32 1)
; CHECK: call void @llvm.pcmarker(i32 1)
@ -1627,10 +1627,10 @@ normal:
declare void @f.writeonly() writeonly
; CHECK: declare void @f.writeonly() #40
; CHECK: declare void @f.writeonly() #39
declare void @f.speculatable() speculatable
; CHECK: declare void @f.speculatable() #41
; CHECK: declare void @f.speculatable() #40
;; Constant Expressions
@ -1678,9 +1678,9 @@ define i8** @constexpr() {
; CHECK: attributes #36 = { argmemonly nounwind readonly }
; CHECK: attributes #37 = { argmemonly nounwind }
; CHECK: attributes #38 = { nounwind readonly }
; CHECK: attributes #39 = { inaccessiblemem_or_argmemonly nounwind }
; CHECK: attributes #40 = { writeonly }
; CHECK: attributes #41 = { speculatable }
; CHECK: attributes #39 = { writeonly }
; CHECK: attributes #40 = { speculatable }
; CHECK: attributes #41 = { inaccessiblemem_or_argmemonly nounwind }
; CHECK: attributes #42 = { builtin }
; CHECK: attributes #43 = { strictfp }

View File

@ -1475,7 +1475,7 @@ declare void @llvm.write_register.i32(metadata, i32)
declare void @llvm.write_register.i64(metadata, i64)
declare i8* @llvm.stacksave()
declare void @llvm.stackrestore(i8*)
declare void @llvm.prefetch(i8*, i32, i32, i32)
declare void @llvm.prefetch.p0i8(i8*, i32, i32, i32)
declare void @llvm.pcmarker(i32)
declare i64 @llvm.readcyclecounter()
declare void @llvm.clear_cache(i8*, i8*)
@ -1502,8 +1502,8 @@ define void @intrinsics.codegen() {
call void @llvm.stackrestore(i8* %stack)
; CHECK: call void @llvm.stackrestore(i8* %stack)
call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
call void @llvm.prefetch.p0i8(i8* %stack, i32 0, i32 3, i32 0)
; CHECK: call void @llvm.prefetch.p0i8(i8* %stack, i32 0, i32 3, i32 0)
call void @llvm.pcmarker(i32 1)
; CHECK: call void @llvm.pcmarker(i32 1)

View File

@ -159,7 +159,7 @@ declare void @llvm.prefetch(i8*, i32, i32, i32)
define void @test_prefetch(i8* %ptr, i32 %arg0, i32 %arg1) {
; CHECK: immarg operand has non-immediate parameter
; CHECK-NEXT: i32 %arg0
; CHECK-NEXT: call void @llvm.prefetch(i8* %ptr, i32 %arg0, i32 0, i32 0)
; CHECK-NEXT: call void @llvm.prefetch.p0i8(i8* %ptr, i32 %arg0, i32 0, i32 0)
; CHECK: immarg operand has non-immediate parameter
; CHECK-NEXT: i32 %arg1
call void @llvm.prefetch(i8* %ptr, i32 %arg0, i32 0, i32 0)