[LSV] Avoid adding vectors of pointers as candidates

Summary:
We no longer add vectors of pointers as candidates for
load/store vectorization. It does not seem to work anyway,
but without this patch we can end up in asserts when trying
to create casts between an integer type and the pointer of
vectors type.

The test case I've added used to assert like this when trying to
cast between i64 and <2 x i16*>:
opt: ../lib/IR/Instructions.cpp:2565: Assertion `castIsValid(op, S, Ty) && "Invalid cast!"' failed.
#0 PrintStackTraceSignalHandler(void*)
#1 SignalHandler(int)
#2 __restore_rt
#3 __GI_raise
#4 __GI_abort
#5 __GI___assert_fail
#6 llvm::CastInst::Create(llvm::Instruction::CastOps, llvm::Value*, llvm::Type*, llvm::Twine const&, llvm::Instruction*)
#7 llvm::IRBuilder<llvm::ConstantFolder, llvm::IRBuilderDefaultInserter>::CreateBitOrPointerCast(llvm::Value*, llvm::Type*, llvm::Twine const&)
#8 Vectorizer::vectorizeStoreChain(llvm::ArrayRef<llvm::Instruction*>, llvm::SmallPtrSet<llvm::Instruction*, 16u>*)

Reviewers: arsenm

Reviewed By: arsenm

Subscribers: nhaehnle, llvm-commits

Differential Revision: https://reviews.llvm.org/D39296

llvm-svn: 316665
This commit is contained in:
Bjorn Pettersson 2017-10-26 13:59:15 +00:00
parent 22a2282da1
commit 86db068e39
2 changed files with 37 additions and 3 deletions

View File

@ -616,6 +616,13 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
if ((TySize % 8) != 0)
continue;
// Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
// functions are currently using an integer type for the vectorized
// load/store, and does not support casting between the integer type and a
// vector of pointers (e.g. i64 to <2 x i16*>)
if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
continue;
Value *Ptr = LI->getPointerOperand();
unsigned AS = Ptr->getType()->getPointerAddressSpace();
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
@ -646,6 +653,13 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
if (!VectorType::isValidElementType(Ty->getScalarType()))
continue;
// Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
// functions are currently using an integer type for the vectorized
// load/store, and does not support casting between the integer type and a
// vector of pointers (e.g. i64 to <2 x i16*>)
if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
continue;
// Skip weird non-byte sizes. They probably aren't worth the effort of
// handling correctly.
unsigned TySize = DL.getTypeSizeInBits(Ty);
@ -701,8 +715,8 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) {
SmallVector<int, 16> Heads, Tails;
int ConsecutiveChain[64];
// Do a quadratic search on all of the given stores and find all of the pairs
// of stores that follow each other.
// Do a quadratic search on all of the given loads/stores and find all of the
// pairs of loads/stores that follow each other.
for (int i = 0, e = Instrs.size(); i < e; ++i) {
ConsecutiveChain[i] = -1;
for (int j = e - 1; j >= 0; --j) {
@ -769,7 +783,7 @@ bool Vectorizer::vectorizeStoreChain(
SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
StoreInst *S0 = cast<StoreInst>(Chain[0]);
// If the vector has an int element, default to int for the whole load.
// If the vector has an int element, default to int for the whole store.
Type *StoreTy;
for (Instruction *I : Chain) {
StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();

View File

@ -632,6 +632,26 @@ define amdgpu_kernel void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias
ret void
}
; Verify that we no longer hit asserts for this test case. No change expected.
; CHECK-LABEL: @copy_vec_of_ptrs
; CHECK: %in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1
; CHECK: %vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1
; CHECK: %vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4
; CHECK: %out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1
; CHECK: store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1
; CHECK: store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4
define amdgpu_kernel void @copy_vec_of_ptrs(<2 x i16*> addrspace(1)* %out,
<2 x i16*> addrspace(1)* %in ) #0 {
%in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1
%vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1
%vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4
%out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1
store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1
store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4
ret void
}
declare void @llvm.amdgcn.s.barrier() #1
attributes #0 = { nounwind }