[LSV] Avoid adding vectors of pointers as candidates

Summary: We no longer add vectors of pointers as candidates for load/store vectorization. It does not seem to work anyway, but without this patch we can end up in asserts when trying to create casts between an integer type and the pointer of vectors type. The test case I've added used to assert like this when trying to cast between i64 and <2 x i16*>: opt: ../lib/IR/Instructions.cpp:2565: Assertion `castIsValid(op, S, Ty) && "Invalid cast!"' failed. #0 PrintStackTraceSignalHandler(void*) #1 SignalHandler(int) #2 __restore_rt #3 __GI_raise #4 __GI_abort #5 __GI___assert_fail #6 llvm::CastInst::Create(llvm::Instruction::CastOps, llvm::Value*, llvm::Type*, llvm::Twine const&, llvm::Instruction*) #7 llvm::IRBuilder<llvm::ConstantFolder, llvm::IRBuilderDefaultInserter>::CreateBitOrPointerCast(llvm::Value*, llvm::Type*, llvm::Twine const&) #8 Vectorizer::vectorizeStoreChain(llvm::ArrayRef<llvm::Instruction*>, llvm::SmallPtrSet<llvm::Instruction*, 16u>*) Reviewers: arsenm Reviewed By: arsenm Subscribers: nhaehnle, llvm-commits Differential Revision: https://reviews.llvm.org/D39296 llvm-svn: 316665
2017-10-26 13:59:15 +00:00 · 2017-10-26 13:59:15 +00:00 · 86db068e39
parent 22a2282da1
commit 86db068e39
2 changed files with 37 additions and 3 deletions
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@ -616,6 +616,13 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
      if ((TySize % 8) != 0)
        continue;

+      // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
+      // functions are currently using an integer type for the vectorized
+      // load/store, and does not support casting between the integer type and a
+      // vector of pointers (e.g. i64 to <2 x i16*>)
+      if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
+        continue;
+
      Value *Ptr = LI->getPointerOperand();
      unsigned AS = Ptr->getType()->getPointerAddressSpace();
      unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
@ -646,6 +653,13 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
      if (!VectorType::isValidElementType(Ty->getScalarType()))
        continue;

+      // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
+      // functions are currently using an integer type for the vectorized
+      // load/store, and does not support casting between the integer type and a
+      // vector of pointers (e.g. i64 to <2 x i16*>)
+      if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
+        continue;
+
      // Skip weird non-byte sizes. They probably aren't worth the effort of
      // handling correctly.
      unsigned TySize = DL.getTypeSizeInBits(Ty);
@ -701,8 +715,8 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) {
  SmallVector<int, 16> Heads, Tails;
  int ConsecutiveChain[64];

-  // Do a quadratic search on all of the given stores and find all of the pairs
-  // of stores that follow each other.
+  // Do a quadratic search on all of the given loads/stores and find all of the
+  // pairs of loads/stores that follow each other.
  for (int i = 0, e = Instrs.size(); i < e; ++i) {
    ConsecutiveChain[i] = -1;
    for (int j = e - 1; j >= 0; --j) {
@ -769,7 +783,7 @@ bool Vectorizer::vectorizeStoreChain(
    SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
  StoreInst *S0 = cast<StoreInst>(Chain[0]);

-  // If the vector has an int element, default to int for the whole load.
+  // If the vector has an int element, default to int for the whole store.
  Type *StoreTy;
  for (Instruction *I : Chain) {
    StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
--- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
@ -632,6 +632,26 @@ define amdgpu_kernel void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias
  ret void
 }

+; Verify that we no longer hit asserts for this test case. No change expected.
+; CHECK-LABEL: @copy_vec_of_ptrs
+; CHECK: %in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1
+; CHECK: %vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1
+; CHECK: %vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4
+; CHECK: %out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1
+; CHECK: store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1
+; CHECK: store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4
+define amdgpu_kernel void @copy_vec_of_ptrs(<2 x i16*> addrspace(1)* %out,
+                                            <2 x i16*> addrspace(1)* %in ) #0 {
+  %in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1
+  %vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1
+  %vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4
+
+  %out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1
+  store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1
+  store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4
+  ret void
+}
+
 declare void @llvm.amdgcn.s.barrier() #1

 attributes #0 = { nounwind }