forked from OSchip/llvm-project
InferAddressSpaces: Support memory intrinsics
llvm-svn: 293587
This commit is contained in:
parent
f63f58a28f
commit
6d5a8d48fd
|
@ -153,7 +153,15 @@ private:
|
|||
Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack,
|
||||
DenseSet<Value *> *Visited) const;
|
||||
|
||||
bool rewriteIntrinsicOperands(IntrinsicInst *II,
|
||||
Value *OldV, Value *NewV) const;
|
||||
void collectRewritableIntrinsicOperands(
|
||||
IntrinsicInst *II,
|
||||
std::vector<std::pair<Value *, bool>> *PostorderStack,
|
||||
DenseSet<Value *> *Visited) const;
|
||||
|
||||
std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
|
||||
|
||||
Value *cloneValueWithNewAddressSpace(
|
||||
Value *V, unsigned NewAddrSpace,
|
||||
const ValueToValueMapTy &ValueWithNewAddrSpace,
|
||||
|
@ -210,6 +218,47 @@ static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: Move logic to TTI?
|
||||
bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
|
||||
Value *OldV,
|
||||
Value *NewV) const {
|
||||
Module *M = II->getParent()->getParent()->getParent();
|
||||
|
||||
switch (II->getIntrinsicID()) {
|
||||
case Intrinsic::objectsize:
|
||||
case Intrinsic::amdgcn_atomic_inc:
|
||||
case Intrinsic::amdgcn_atomic_dec: {
|
||||
Type *DestTy = II->getType();
|
||||
Type *SrcTy = NewV->getType();
|
||||
Function *NewDecl
|
||||
= Intrinsic::getDeclaration(M, II->getIntrinsicID(), { DestTy, SrcTy });
|
||||
II->setArgOperand(0, NewV);
|
||||
II->setCalledFunction(NewDecl);
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Move logic to TTI?
|
||||
void InferAddressSpaces::collectRewritableIntrinsicOperands(
|
||||
IntrinsicInst *II,
|
||||
std::vector<std::pair<Value *, bool>> *PostorderStack,
|
||||
DenseSet<Value *> *Visited) const {
|
||||
switch (II->getIntrinsicID()) {
|
||||
case Intrinsic::objectsize:
|
||||
case Intrinsic::amdgcn_atomic_inc:
|
||||
case Intrinsic::amdgcn_atomic_dec:
|
||||
appendsFlatAddressExpressionToPostorderStack(
|
||||
II->getArgOperand(0), PostorderStack, Visited);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns all flat address expressions in function F. The elements are
|
||||
// If V is an unvisited flat address expression, appends V to PostorderStack
|
||||
// and marks it as visited.
|
||||
void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
|
||||
|
@ -224,7 +273,7 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
|
|||
}
|
||||
|
||||
// Returns all flat address expressions in function F. The elements are ordered
|
||||
// in postorder.
|
||||
// ordered in postorder.
|
||||
std::vector<Value *>
|
||||
InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
|
||||
// This function implements a non-recursive postorder traversal of a partial
|
||||
|
@ -249,8 +298,15 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
|
|||
PushPtrOperand(RMW->getPointerOperand());
|
||||
else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
|
||||
PushPtrOperand(CmpX->getPointerOperand());
|
||||
else if (auto *MI = dyn_cast<MemIntrinsic>(&I)) {
|
||||
// For memset/memcpy/memmove, any pointer operand can be replaced.
|
||||
PushPtrOperand(MI->getRawDest());
|
||||
|
||||
// TODO: Support intrinsics
|
||||
// Handle 2nd operand for memcpy/memmove.
|
||||
if (auto *MTI = dyn_cast<MemTransferInst>(MI))
|
||||
PushPtrOperand(MTI->getRawSource());
|
||||
} else if (auto *II = dyn_cast<IntrinsicInst>(&I))
|
||||
collectRewritableIntrinsicOperands(II, &PostorderStack, &Visited);
|
||||
}
|
||||
|
||||
std::vector<Value *> Postorder; // The resultant postorder.
|
||||
|
@ -560,6 +616,63 @@ static bool isSimplePointerUseValidToReplace(Use &U) {
|
|||
return false;
|
||||
}
|
||||
|
||||
/// Update memory intrinsic uses that require more complex processing than
|
||||
/// simple memory instructions. Thse require re-mangling and may have multiple
|
||||
/// pointer operands.
|
||||
static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI,
|
||||
Value *OldV, Value *NewV) {
|
||||
IRBuilder<> B(MI);
|
||||
MDNode *TBAA = MI->getMetadata(LLVMContext::MD_tbaa);
|
||||
MDNode *ScopeMD = MI->getMetadata(LLVMContext::MD_alias_scope);
|
||||
MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias);
|
||||
|
||||
if (auto *MSI = dyn_cast<MemSetInst>(MI)) {
|
||||
B.CreateMemSet(NewV, MSI->getValue(),
|
||||
MSI->getLength(), MSI->getAlignment(),
|
||||
false, // isVolatile
|
||||
TBAA, ScopeMD, NoAliasMD);
|
||||
} else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
|
||||
Value *Src = MTI->getRawSource();
|
||||
Value *Dest = MTI->getRawDest();
|
||||
|
||||
// Be careful in case this is a self-to-self copy.
|
||||
if (Src == OldV)
|
||||
Src = NewV;
|
||||
|
||||
if (Dest == OldV)
|
||||
Dest = NewV;
|
||||
|
||||
if (isa<MemCpyInst>(MTI)) {
|
||||
MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct);
|
||||
B.CreateMemCpy(Dest, Src, MTI->getLength(),
|
||||
MTI->getAlignment(),
|
||||
false, // isVolatile
|
||||
TBAA, TBAAStruct, ScopeMD, NoAliasMD);
|
||||
} else {
|
||||
assert(isa<MemMoveInst>(MTI));
|
||||
B.CreateMemMove(Dest, Src, MTI->getLength(),
|
||||
MTI->getAlignment(),
|
||||
false, // isVolatile
|
||||
TBAA, ScopeMD, NoAliasMD);
|
||||
}
|
||||
} else
|
||||
llvm_unreachable("unhandled MemIntrinsic");
|
||||
|
||||
MI->eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
static Value::use_iterator skipToNextUser(Value::use_iterator I,
|
||||
Value::use_iterator End) {
|
||||
User *CurUser = I->getUser();
|
||||
++I;
|
||||
|
||||
while (I != End && I->getUser() == CurUser)
|
||||
++I;
|
||||
|
||||
return I;
|
||||
}
|
||||
|
||||
bool InferAddressSpaces::rewriteWithNewAddressSpaces(
|
||||
const std::vector<Value *> &Postorder,
|
||||
const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
|
||||
|
@ -595,20 +708,38 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
|
|||
if (NewV == nullptr)
|
||||
continue;
|
||||
|
||||
SmallVector<Use *, 4> Uses;
|
||||
for (Use &U : V->uses())
|
||||
Uses.push_back(&U);
|
||||
|
||||
DEBUG(dbgs() << "Replacing the uses of " << *V
|
||||
<< "\n with\n " << *NewV << '\n');
|
||||
|
||||
for (Use *U : Uses) {
|
||||
if (isSimplePointerUseValidToReplace(*U)) {
|
||||
Value::use_iterator I, E, Next;
|
||||
for (I = V->use_begin(), E = V->use_end(); I != E; ) {
|
||||
Use &U = *I;
|
||||
|
||||
// Some users may see the same pointer operand in multiple operands. Skip
|
||||
// to the next instruction.
|
||||
I = skipToNextUser(I, E);
|
||||
|
||||
if (isSimplePointerUseValidToReplace(U)) {
|
||||
// If V is used as the pointer operand of a compatible memory operation,
|
||||
// sets the pointer operand to NewV. This replacement does not change
|
||||
// the element type, so the resultant load/store is still valid.
|
||||
U->set(NewV);
|
||||
} else if (isa<Instruction>(U->getUser())) {
|
||||
U.set(NewV);
|
||||
continue;
|
||||
}
|
||||
|
||||
User *CurUser = U.getUser();
|
||||
// Handle more complex cases like intrinsic that need to be remangled.
|
||||
if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) {
|
||||
if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto *II = dyn_cast<IntrinsicInst>(CurUser)) {
|
||||
if (rewriteIntrinsicOperands(II, V, NewV))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isa<Instruction>(CurUser)) {
|
||||
// Otherwise, replaces the use with flat(NewV).
|
||||
// TODO: Some optimization opportunities are missed. For example, in
|
||||
// %0 = icmp eq float* %p, %q
|
||||
|
@ -622,13 +753,14 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
|
|||
BasicBlock::iterator InsertPos = std::next(I->getIterator());
|
||||
while (isa<PHINode>(InsertPos))
|
||||
++InsertPos;
|
||||
U->set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
|
||||
U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
|
||||
} else {
|
||||
U->set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
|
||||
V->getType()));
|
||||
U.set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
|
||||
V->getType()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (V->use_empty())
|
||||
RecursivelyDeleteTriviallyDeadInstructions(V);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: @objectsize_group_to_flat_i32(
|
||||
; CHECK: %val = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %group.ptr, i1 true)
|
||||
define i32 @objectsize_group_to_flat_i32(i8 addrspace(3)* %group.ptr) #0 {
|
||||
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
|
||||
%val = call i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)* %cast, i1 true)
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @objectsize_global_to_flat_i64(
|
||||
; CHECK: %val = call i64 @llvm.objectsize.i64.p3i8(i8 addrspace(3)* %global.ptr, i1 true)
|
||||
define i64 @objectsize_global_to_flat_i64(i8 addrspace(3)* %global.ptr) #0 {
|
||||
%cast = addrspacecast i8 addrspace(3)* %global.ptr to i8 addrspace(4)*
|
||||
%val = call i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)* %cast, i1 true)
|
||||
ret i64 %val
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @atomicinc_global_to_flat_i32(
|
||||
; CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %y)
|
||||
define i32 @atomicinc_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
|
||||
%cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
|
||||
%ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @atomicinc_group_to_flat_i32(
|
||||
; CHECK: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %y)
|
||||
define i32 @atomicinc_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
|
||||
%cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
|
||||
%ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @atomicinc_global_to_flat_i64(
|
||||
; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y)
|
||||
define i64 @atomicinc_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
|
||||
%cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
|
||||
%ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @atomicinc_group_to_flat_i64(
|
||||
; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y)
|
||||
define i64 @atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
|
||||
%cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
|
||||
%ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @atomicdec_global_to_flat_i32(
|
||||
; CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %val)
|
||||
define i32 @atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 {
|
||||
%cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
|
||||
%ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @atomicdec_group_to_flat_i32(
|
||||
; CHECK: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %val)
|
||||
define i32 @atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 {
|
||||
%cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
|
||||
%ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @atomicdec_global_to_flat_i64(
|
||||
; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y)
|
||||
define i64 @atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
|
||||
%cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
|
||||
%ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @atomicdec_group_to_flat_i64(
|
||||
; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y)
|
||||
define i64 @atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
|
||||
%cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
|
||||
%ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
declare i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)*, i1) #1
|
||||
declare i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)*, i1) #1
|
||||
declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2
|
||||
declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2
|
||||
declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2
|
||||
declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { nounwind argmemonly }
|
|
@ -0,0 +1,134 @@
|
|||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: @memset_group_to_flat(
|
||||
; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
define void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
|
||||
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @memset_global_to_flat(
|
||||
; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
define void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
|
||||
%cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @memset_group_to_flat_no_md(
|
||||
; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 %size, i32 4, i1 false){{$}}
|
||||
define void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 {
|
||||
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @memset_global_to_flat_no_md(
|
||||
; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 %size, i32 4, i1 false){{$}}
|
||||
define void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 {
|
||||
%cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group(
|
||||
; CHCK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
define void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
|
||||
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group(
|
||||
; CHECK: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
define void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 {
|
||||
%cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group(
|
||||
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %src.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
define void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
|
||||
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
|
||||
%cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global(
|
||||
; CHECK: call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
define void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 {
|
||||
%cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8 addrspace(4)*
|
||||
%cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global(
|
||||
; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
define void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 {
|
||||
%cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* %cast.dest, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
|
||||
; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa.struct !7
|
||||
define void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
|
||||
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa.struct !7
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md(
|
||||
; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
|
||||
define void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
|
||||
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
|
||||
; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
|
||||
; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
|
||||
define void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
|
||||
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
|
||||
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check for iterator problems if the pointer has 2 uses in the same call
|
||||
; CHECK-LABEL: @memcpy_group_flat_to_flat_self(
|
||||
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 addrspace(3)* %group.ptr, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
define void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 {
|
||||
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast, i8 addrspace(4)* %cast, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
ret void
|
||||
}
|
||||
; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group(
|
||||
; CHECK: call void @llvm.memmove.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
define void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
|
||||
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i32, i1) #1
|
||||
declare void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i32, i1) #1
|
||||
declare void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1
|
||||
declare void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i32, i1) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { argmemonly nounwind }
|
||||
|
||||
!0 = !{!1, !1, i64 0}
|
||||
!1 = !{!"A", !2}
|
||||
!2 = !{!"tbaa root"}
|
||||
!3 = !{!"B", !2}
|
||||
!4 = !{!5}
|
||||
!5 = distinct !{!5, !6, !"some scope"}
|
||||
!6 = distinct !{!6, !"some domain"}
|
||||
!7 = !{i64 0, i64 8, null}
|
|
@ -115,4 +115,26 @@ define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr,
|
|||
ret { i32, i1 } %ret
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
; FIXME: Shouldn't be losing names
|
||||
; CHECK-LABEL: @volatile_memset_group_to_flat(
|
||||
; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
|
||||
; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true)
|
||||
define void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
|
||||
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @volatile_memset_global_to_flat(
|
||||
; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
|
||||
; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true)
|
||||
define void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
|
||||
%cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
|
||||
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i32, i1) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { argmemonly nounwind }
|
||||
|
|
Loading…
Reference in New Issue