diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 54be21d40831..91c872d709c0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// // +// \file // Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when // the size is large or is not a compile-time constant. // @@ -19,7 +20,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" @@ -57,47 +57,52 @@ struct NVPTXLowerAggrCopies : public FunctionPass { char NVPTXLowerAggrCopies::ID = 0; // Lower memcpy to loop. -void convertMemCpyToLoop(Instruction *splitAt, Value *srcAddr, Value *dstAddr, - Value *len, bool srcVolatile, bool dstVolatile, - LLVMContext &Context, Function &F) { - Type *indType = len->getType(); +void convertMemCpyToLoop(Instruction *ConvertedInst, Value *SrcAddr, + Value *DstAddr, Value *CopyLen, bool SrcIsVolatile, + bool DstIsVolatile, LLVMContext &Context, + Function &F) { + Type *TypeOfCopyLen = CopyLen->getType(); - BasicBlock *origBB = splitAt->getParent(); - BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split"); - BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB); + BasicBlock *OrigBB = ConvertedInst->getParent(); + BasicBlock *NewBB = + ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split"); + BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB); - origBB->getTerminator()->setSuccessor(0, loopBB); - IRBuilder<> builder(origBB, origBB->getTerminator()); + OrigBB->getTerminator()->setSuccessor(0, LoopBB); + IRBuilder<> Builder(OrigBB, OrigBB->getTerminator()); - // srcAddr and dstAddr are expected to be pointer types, + // SrcAddr and DstAddr are expected to be pointer types, // so no check is made here. - unsigned srcAS = cast(srcAddr->getType())->getAddressSpace(); - unsigned dstAS = cast(dstAddr->getType())->getAddressSpace(); + unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); // Cast pointers to (char *) - srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS)); - dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS)); + SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS)); + DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS)); - IRBuilder<> loop(loopBB); - // The loop index (ind) is a phi node. - PHINode *ind = loop.CreatePHI(indType, 0); - // Incoming value for ind is 0 - ind->addIncoming(ConstantInt::get(indType, 0), origBB); + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); + LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); - // load from srcAddr+ind + // load from SrcAddr+LoopIndex // TODO: we can leverage the align parameter of llvm.memcpy for more efficient // word-sized loads and stores. - Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind), - srcVolatile); - // store at dstAddr+ind - loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind), - dstVolatile); + Value *Element = LoopBuilder.CreateLoad( + LoopBuilder.CreateGEP(LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex), + SrcIsVolatile); + // store at DstAddr+LoopIndex + LoopBuilder.CreateStore( + Element, + LoopBuilder.CreateGEP(LoopBuilder.getInt8Ty(), DstAddr, LoopIndex), + DstIsVolatile); - // The value for ind coming from backedge is (ind + 1) - Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1)); - ind->addIncoming(newind, loopBB); + // The value for LoopIndex coming from backedge is (LoopIndex + 1) + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); + LoopIndex->addIncoming(NewIndex, LoopBB); - loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB); + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, + NewBB); } // Lower memmove to IR. memmove is required to correctly copy overlapping memory @@ -122,11 +127,12 @@ void convertMemCpyToLoop(Instruction *splitAt, Value *srcAddr, Value *dstAddr, // } // return dst; // } -void convertMemMoveToLoop(Instruction *splitAt, Value *srcAddr, Value *dstAddr, - Value *len, bool srcVolatile, bool dstVolatile, - LLVMContext &Context, Function &F) { - Type *TypeOfLen = len->getType(); - BasicBlock *OrigBB = splitAt->getParent(); +void convertMemMoveToLoop(Instruction *ConvertedInst, Value *SrcAddr, + Value *DstAddr, Value *CopyLen, bool SrcIsVolatile, + bool DstIsVolatile, LLVMContext &Context, + Function &F) { + Type *TypeOfCopyLen = CopyLen->getType(); + BasicBlock *OrigBB = ConvertedInst->getParent(); // Create the a comparison of src and dst, based on which we jump to either // the forward-copy part of the function (if src >= dst) or the backwards-copy @@ -134,10 +140,11 @@ void convertMemMoveToLoop(Instruction *splitAt, Value *srcAddr, Value *dstAddr, // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else // structure. Its block terminators (unconditional branches) are replaced by // the appropriate conditional branches when the loop is built. - ICmpInst *PtrCompare = new ICmpInst(splitAt, ICmpInst::ICMP_ULT, srcAddr, - dstAddr, "compare_src_dst"); + ICmpInst *PtrCompare = new ICmpInst(ConvertedInst, ICmpInst::ICMP_ULT, + SrcAddr, DstAddr, "compare_src_dst"); TerminatorInst *ThenTerm, *ElseTerm; - SplitBlockAndInsertIfThenElse(PtrCompare, splitAt, &ThenTerm, &ElseTerm); + SplitBlockAndInsertIfThenElse(PtrCompare, ConvertedInst, &ThenTerm, + &ElseTerm); // Each part of the function consists of two blocks: // copy_backwards: used to skip the loop when n == 0 @@ -148,31 +155,31 @@ void convertMemMoveToLoop(Instruction *splitAt, Value *srcAddr, Value *dstAddr, CopyBackwardsBB->setName("copy_backwards"); BasicBlock *CopyForwardBB = ElseTerm->getParent(); CopyForwardBB->setName("copy_forward"); - BasicBlock *ExitBB = splitAt->getParent(); + BasicBlock *ExitBB = ConvertedInst->getParent(); ExitBB->setName("memmove_done"); // Initial comparison of n == 0 that lets us skip the loops altogether. Shared // between both backwards and forward copy clauses. ICmpInst *CompareN = - new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, len, - ConstantInt::get(TypeOfLen, 0), "compare_n_to_0"); + new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, + ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); // Copying backwards. BasicBlock *LoopBB = BasicBlock::Create(Context, "copy_backwards_loop", &F, CopyForwardBB); IRBuilder<> LoopBuilder(LoopBB); - PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfLen, 0); + PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); Value *IndexPtr = LoopBuilder.CreateSub( - LoopPhi, ConstantInt::get(TypeOfLen, 1), "index_ptr"); + LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); Value *Element = LoopBuilder.CreateLoad( - LoopBuilder.CreateInBoundsGEP(srcAddr, IndexPtr), "element"); + LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element"); LoopBuilder.CreateStore(Element, - LoopBuilder.CreateInBoundsGEP(dstAddr, IndexPtr)); + LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr)); LoopBuilder.CreateCondBr( - LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfLen, 0)), + LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), ExitBB, LoopBB); LoopPhi->addIncoming(IndexPtr, LoopBB); - LoopPhi->addIncoming(len, CopyBackwardsBB); + LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); ThenTerm->eraseFromParent(); @@ -180,52 +187,57 @@ void convertMemMoveToLoop(Instruction *splitAt, Value *srcAddr, Value *dstAddr, BasicBlock *FwdLoopBB = BasicBlock::Create(Context, "copy_forward_loop", &F, ExitBB); IRBuilder<> FwdLoopBuilder(FwdLoopBB); - PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfLen, 0, "index_ptr"); + PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); Value *FwdElement = FwdLoopBuilder.CreateLoad( - FwdLoopBuilder.CreateInBoundsGEP(srcAddr, FwdCopyPhi), "element"); + FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element"); FwdLoopBuilder.CreateStore( - FwdElement, FwdLoopBuilder.CreateInBoundsGEP(dstAddr, FwdCopyPhi)); + FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi)); Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( - FwdCopyPhi, ConstantInt::get(TypeOfLen, 1), "index_increment"); - FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, len), + FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); + FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), ExitBB, FwdLoopBB); FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); - FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfLen, 0), CopyForwardBB); + FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); ElseTerm->eraseFromParent(); } // Lower memset to loop. -void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, Value *len, - Value *val, LLVMContext &Context, Function &F) { - BasicBlock *origBB = splitAt->getParent(); - BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split"); - BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB); +void convertMemSetToLoop(Instruction *ConvertedInst, Value *DstAddr, + Value *CopyLen, Value *SetValue, LLVMContext &Context, + Function &F) { + BasicBlock *OrigBB = ConvertedInst->getParent(); + BasicBlock *NewBB = + ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split"); + BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB); - origBB->getTerminator()->setSuccessor(0, loopBB); - IRBuilder<> builder(origBB, origBB->getTerminator()); - - unsigned dstAS = cast(dstAddr->getType())->getAddressSpace(); + OrigBB->getTerminator()->setSuccessor(0, LoopBB); + IRBuilder<> Builder(OrigBB, OrigBB->getTerminator()); // Cast pointer to the type of value getting stored - dstAddr = - builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS)); + unsigned dstAS = cast(DstAddr->getType())->getAddressSpace(); + DstAddr = Builder.CreateBitCast(DstAddr, + PointerType::get(SetValue->getType(), dstAS)); - IRBuilder<> loop(loopBB); - PHINode *ind = loop.CreatePHI(len->getType(), 0); - ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB); + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0); + LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB); - loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false); + LoopBuilder.CreateStore( + SetValue, LoopBuilder.CreateGEP(SetValue->getType(), DstAddr, LoopIndex), + false); - Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1)); - ind->addIncoming(newind, loopBB); + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1)); + LoopIndex->addIncoming(NewIndex, LoopBB); - loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB); + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, + NewBB); } bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { - SmallVector aggrLoads; + SmallVector AggrLoads; SmallVector MemCalls; const DataLayout &DL = F.getParent()->getDataLayout(); @@ -235,18 +247,17 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; ++II) { - if (LoadInst *load = dyn_cast(II)) { - if (!load->hasOneUse()) + if (LoadInst *LI = dyn_cast(II)) { + if (!LI->hasOneUse()) continue; - if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize) + if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize) continue; - User *use = load->user_back(); - if (StoreInst *store = dyn_cast(use)) { - if (store->getOperand(0) != load) + if (StoreInst *SI = dyn_cast(LI->user_back())) { + if (SI->getOperand(0) != LI) continue; - aggrLoads.push_back(load); + AggrLoads.push_back(LI); } } else if (MemIntrinsic *IntrCall = dyn_cast(II)) { // Convert intrinsic calls with variable size or with constant size @@ -262,55 +273,60 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { } } - if (aggrLoads.size() == 0 && MemCalls.size() == 0) { + if (AggrLoads.size() == 0 && MemCalls.size() == 0) { return false; } // // Do the transformation of an aggr load/copy/set to a loop // - for (LoadInst *load : aggrLoads) { - StoreInst *store = dyn_cast(*load->user_begin()); - Value *srcAddr = load->getOperand(0); - Value *dstAddr = store->getOperand(1); - unsigned numLoads = DL.getTypeStoreSize(load->getType()); - Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads); + for (LoadInst *LI : AggrLoads) { + StoreInst *SI = dyn_cast(*LI->user_begin()); + Value *SrcAddr = LI->getOperand(0); + Value *DstAddr = SI->getOperand(1); + unsigned NumLoads = DL.getTypeStoreSize(LI->getType()); + Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads); - convertMemCpyToLoop(store, srcAddr, dstAddr, len, load->isVolatile(), - store->isVolatile(), Context, F); + convertMemCpyToLoop(/* ConvertedInst */ SI, + /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, + /* CopyLen */ CopyLen, + /* SrcIsVolatile */ LI->isVolatile(), + /* DstIsVolatile */ SI->isVolatile(), + /* Context */ Context, + /* Function F */ F); - store->eraseFromParent(); - load->eraseFromParent(); + SI->eraseFromParent(); + LI->eraseFromParent(); } // Transform mem* intrinsic calls. for (MemIntrinsic *MemCall : MemCalls) { if (MemCpyInst *Memcpy = dyn_cast(MemCall)) { - convertMemCpyToLoop(/* splitAt */ Memcpy, - /* srcAddr */ Memcpy->getRawSource(), - /* dstAddr */ Memcpy->getRawDest(), - /* len */ Memcpy->getLength(), - /* srcVolatile */ Memcpy->isVolatile(), - /* dstVolatile */ Memcpy->isVolatile(), + convertMemCpyToLoop(/* ConvertedInst */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ Memcpy->getLength(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), /* Context */ Context, /* Function F */ F); } else if (MemMoveInst *Memmove = dyn_cast(MemCall)) { - convertMemMoveToLoop(/* splitAt */ Memmove, - /* srcAddr */ Memmove->getRawSource(), - /* dstAddr */ Memmove->getRawDest(), - /* len */ Memmove->getLength(), - /* srcVolatile */ Memmove->isVolatile(), - /* dstVolatile */ Memmove->isVolatile(), + convertMemMoveToLoop(/* ConvertedInst */ Memmove, + /* SrcAddr */ Memmove->getRawSource(), + /* DstAddr */ Memmove->getRawDest(), + /* CopyLen */ Memmove->getLength(), + /* SrcIsVolatile */ Memmove->isVolatile(), + /* DstIsVolatile */ Memmove->isVolatile(), /* Context */ Context, /* Function F */ F); } else if (MemSetInst *Memset = dyn_cast(MemCall)) { - convertMemSetToLoop(/* splitAt */ Memset, - /* dstAddr */ Memset->getRawDest(), - /* len */ Memset->getLength(), - /* val */ Memset->getValue(), + convertMemSetToLoop(/* ConvertedInst */ Memset, + /* DstAddr */ Memset->getRawDest(), + /* CopyLen */ Memset->getLength(), + /* SetValue */ Memset->getValue(), /* Context */ Context, - /* F */ F); + /* Function F */ F); } MemCall->eraseFromParent(); }