InferAddressSpaces: Support memory intrinsics

llvm-svn: 293587
This commit is contained in:
Matt Arsenault 2017-01-31 01:56:57 +00:00
parent f63f58a28f
commit 6d5a8d48fd
4 changed files with 394 additions and 14 deletions

View File

@ -153,7 +153,15 @@ private:
Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack,
DenseSet<Value *> *Visited) const;
bool rewriteIntrinsicOperands(IntrinsicInst *II,
Value *OldV, Value *NewV) const;
void collectRewritableIntrinsicOperands(
IntrinsicInst *II,
std::vector<std::pair<Value *, bool>> *PostorderStack,
DenseSet<Value *> *Visited) const;
std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
Value *cloneValueWithNewAddressSpace(
Value *V, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
@ -210,6 +218,47 @@ static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
}
}
// TODO: Move logic to TTI?
bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
Value *OldV,
Value *NewV) const {
Module *M = II->getParent()->getParent()->getParent();
switch (II->getIntrinsicID()) {
case Intrinsic::objectsize:
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec: {
Type *DestTy = II->getType();
Type *SrcTy = NewV->getType();
Function *NewDecl
= Intrinsic::getDeclaration(M, II->getIntrinsicID(), { DestTy, SrcTy });
II->setArgOperand(0, NewV);
II->setCalledFunction(NewDecl);
return true;
}
default:
return false;
}
}
// TODO: Move logic to TTI?
void InferAddressSpaces::collectRewritableIntrinsicOperands(
IntrinsicInst *II,
std::vector<std::pair<Value *, bool>> *PostorderStack,
DenseSet<Value *> *Visited) const {
switch (II->getIntrinsicID()) {
case Intrinsic::objectsize:
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
appendsFlatAddressExpressionToPostorderStack(
II->getArgOperand(0), PostorderStack, Visited);
break;
default:
break;
}
}
// Returns all flat address expressions in function F. The elements are
// If V is an unvisited flat address expression, appends V to PostorderStack
// and marks it as visited.
void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
@ -224,7 +273,7 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
}
// Returns all flat address expressions in function F. The elements are ordered
// in postorder.
// ordered in postorder.
std::vector<Value *>
InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
// This function implements a non-recursive postorder traversal of a partial
@ -249,8 +298,15 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
PushPtrOperand(RMW->getPointerOperand());
else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
PushPtrOperand(CmpX->getPointerOperand());
else if (auto *MI = dyn_cast<MemIntrinsic>(&I)) {
// For memset/memcpy/memmove, any pointer operand can be replaced.
PushPtrOperand(MI->getRawDest());
// TODO: Support intrinsics
// Handle 2nd operand for memcpy/memmove.
if (auto *MTI = dyn_cast<MemTransferInst>(MI))
PushPtrOperand(MTI->getRawSource());
} else if (auto *II = dyn_cast<IntrinsicInst>(&I))
collectRewritableIntrinsicOperands(II, &PostorderStack, &Visited);
}
std::vector<Value *> Postorder; // The resultant postorder.
@ -560,6 +616,63 @@ static bool isSimplePointerUseValidToReplace(Use &U) {
return false;
}
/// Update memory intrinsic uses that require more complex processing than
/// simple memory instructions. Thse require re-mangling and may have multiple
/// pointer operands.
static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI,
Value *OldV, Value *NewV) {
IRBuilder<> B(MI);
MDNode *TBAA = MI->getMetadata(LLVMContext::MD_tbaa);
MDNode *ScopeMD = MI->getMetadata(LLVMContext::MD_alias_scope);
MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias);
if (auto *MSI = dyn_cast<MemSetInst>(MI)) {
B.CreateMemSet(NewV, MSI->getValue(),
MSI->getLength(), MSI->getAlignment(),
false, // isVolatile
TBAA, ScopeMD, NoAliasMD);
} else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
Value *Src = MTI->getRawSource();
Value *Dest = MTI->getRawDest();
// Be careful in case this is a self-to-self copy.
if (Src == OldV)
Src = NewV;
if (Dest == OldV)
Dest = NewV;
if (isa<MemCpyInst>(MTI)) {
MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct);
B.CreateMemCpy(Dest, Src, MTI->getLength(),
MTI->getAlignment(),
false, // isVolatile
TBAA, TBAAStruct, ScopeMD, NoAliasMD);
} else {
assert(isa<MemMoveInst>(MTI));
B.CreateMemMove(Dest, Src, MTI->getLength(),
MTI->getAlignment(),
false, // isVolatile
TBAA, ScopeMD, NoAliasMD);
}
} else
llvm_unreachable("unhandled MemIntrinsic");
MI->eraseFromParent();
return true;
}
static Value::use_iterator skipToNextUser(Value::use_iterator I,
Value::use_iterator End) {
User *CurUser = I->getUser();
++I;
while (I != End && I->getUser() == CurUser)
++I;
return I;
}
bool InferAddressSpaces::rewriteWithNewAddressSpaces(
const std::vector<Value *> &Postorder,
const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
@ -595,20 +708,38 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
if (NewV == nullptr)
continue;
SmallVector<Use *, 4> Uses;
for (Use &U : V->uses())
Uses.push_back(&U);
DEBUG(dbgs() << "Replacing the uses of " << *V
<< "\n with\n " << *NewV << '\n');
for (Use *U : Uses) {
if (isSimplePointerUseValidToReplace(*U)) {
Value::use_iterator I, E, Next;
for (I = V->use_begin(), E = V->use_end(); I != E; ) {
Use &U = *I;
// Some users may see the same pointer operand in multiple operands. Skip
// to the next instruction.
I = skipToNextUser(I, E);
if (isSimplePointerUseValidToReplace(U)) {
// If V is used as the pointer operand of a compatible memory operation,
// sets the pointer operand to NewV. This replacement does not change
// the element type, so the resultant load/store is still valid.
U->set(NewV);
} else if (isa<Instruction>(U->getUser())) {
U.set(NewV);
continue;
}
User *CurUser = U.getUser();
// Handle more complex cases like intrinsic that need to be remangled.
if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) {
if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV))
continue;
}
if (auto *II = dyn_cast<IntrinsicInst>(CurUser)) {
if (rewriteIntrinsicOperands(II, V, NewV))
continue;
}
if (isa<Instruction>(CurUser)) {
// Otherwise, replaces the use with flat(NewV).
// TODO: Some optimization opportunities are missed. For example, in
// %0 = icmp eq float* %p, %q
@ -622,13 +753,14 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
BasicBlock::iterator InsertPos = std::next(I->getIterator());
while (isa<PHINode>(InsertPos))
++InsertPos;
U->set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
} else {
U->set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
V->getType()));
U.set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
V->getType()));
}
}
}
if (V->use_empty())
RecursivelyDeleteTriviallyDeadInstructions(V);
}

View File

@ -0,0 +1,92 @@
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
; CHECK-LABEL: @objectsize_group_to_flat_i32(
; CHECK: %val = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %group.ptr, i1 true)
define i32 @objectsize_group_to_flat_i32(i8 addrspace(3)* %group.ptr) #0 {
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
%val = call i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)* %cast, i1 true)
ret i32 %val
}
; CHECK-LABEL: @objectsize_global_to_flat_i64(
; CHECK: %val = call i64 @llvm.objectsize.i64.p3i8(i8 addrspace(3)* %global.ptr, i1 true)
define i64 @objectsize_global_to_flat_i64(i8 addrspace(3)* %global.ptr) #0 {
%cast = addrspacecast i8 addrspace(3)* %global.ptr to i8 addrspace(4)*
%val = call i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)* %cast, i1 true)
ret i64 %val
}
; CHECK-LABEL: @atomicinc_global_to_flat_i32(
; CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %y)
define i32 @atomicinc_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
%cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
%ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y)
ret i32 %ret
}
; CHECK-LABEL: @atomicinc_group_to_flat_i32(
; CHECK: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %y)
define i32 @atomicinc_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
%cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
%ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y)
ret i32 %ret
}
; CHECK-LABEL: @atomicinc_global_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y)
define i64 @atomicinc_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
%cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
%ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
ret i64 %ret
}
; CHECK-LABEL: @atomicinc_group_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y)
define i64 @atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
%cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
%ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
ret i64 %ret
}
; CHECK-LABEL: @atomicdec_global_to_flat_i32(
; CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %val)
define i32 @atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 {
%cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
%ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val)
ret i32 %ret
}
; CHECK-LABEL: @atomicdec_group_to_flat_i32(
; CHECK: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %val)
define i32 @atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 {
%cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
%ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val)
ret i32 %ret
}
; CHECK-LABEL: @atomicdec_global_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y)
define i64 @atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
%cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
%ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
ret i64 %ret
}
; CHECK-LABEL: @atomicdec_group_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y)
define i64 @atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
%cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
%ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
ret i64 %ret
}
declare i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)*, i1) #1
declare i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)*, i1) #1
declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2
declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2
declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2
declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind argmemonly }

View File

@ -0,0 +1,134 @@
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
; CHECK-LABEL: @memset_group_to_flat(
; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memset_global_to_flat(
; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
%cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memset_group_to_flat_no_md(
; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 %size, i32 4, i1 false){{$}}
define void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 {
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false)
ret void
}
; CHECK-LABEL: @memset_global_to_flat_no_md(
; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 %size, i32 4, i1 false){{$}}
define void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 {
%cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false)
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group(
; CHCK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group(
; CHECK: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 {
%cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group(
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %src.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
%cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global(
; CHECK: call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8 addrspace(4)*
%cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global(
; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 {
%cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* %cast.dest, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa.struct !7
define void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa.struct !7
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md(
; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
define void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
ret void
}
; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
define void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
ret void
}
; Check for iterator problems if the pointer has 2 uses in the same call
; CHECK-LABEL: @memcpy_group_flat_to_flat_self(
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 addrspace(3)* %group.ptr, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 {
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast, i8 addrspace(4)* %cast, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group(
; CHECK: call void @llvm.memmove.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
call void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i32, i1) #1
declare void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i32, i1) #1
declare void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1
declare void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i32, i1) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }
!0 = !{!1, !1, i64 0}
!1 = !{!"A", !2}
!2 = !{!"tbaa root"}
!3 = !{!"B", !2}
!4 = !{!5}
!5 = distinct !{!5, !6, !"some scope"}
!6 = distinct !{!6, !"some domain"}
!7 = !{i64 0, i64 8, null}

View File

@ -115,4 +115,26 @@ define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr,
ret { i32, i1 } %ret
}
attributes #0 = { nounwind }
; FIXME: Shouldn't be losing names
; CHECK-LABEL: @volatile_memset_group_to_flat(
; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true)
define void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true)
ret void
}
; CHECK-LABEL: @volatile_memset_global_to_flat(
; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true)
define void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
%cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true)
ret void
}
declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i32, i1) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }