diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index ceadb8d06343..e0438b037c03 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -579,7 +579,15 @@ bool Vectorizer::vectorizeInstructions(ArrayRef Instrs) { bool Vectorizer::vectorizeStoreChain(ArrayRef Chain) { StoreInst *S0 = cast(Chain[0]); - Type *StoreTy = S0->getValueOperand()->getType(); + + // If the vector has an int element, default to int for the whole load. + Type *StoreTy; + for (const auto &V : Chain) { + StoreTy = cast(V)->getValueOperand()->getType(); + if (StoreTy->isIntOrIntVectorTy()) + break; + } + unsigned Sz = DL.getTypeSizeInBits(StoreTy); unsigned VF = VecRegSize / Sz; unsigned ChainSize = Chain.size(); @@ -700,7 +708,15 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef Chain) { bool Vectorizer::vectorizeLoadChain(ArrayRef Chain) { LoadInst *L0 = cast(Chain[0]); - Type *LoadTy = L0->getType(); + + // If the vector has an int element, default to int for the whole load. + Type *LoadTy; + for (const auto &V : Chain) { + LoadTy = cast(V)->getType(); + if (LoadTy->isIntOrIntVectorTy()) + break; + } + unsigned Sz = DL.getTypeSizeInBits(LoadTy); unsigned VF = VecRegSize / Sz; unsigned ChainSize = Chain.size(); diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll index 2a4015d915eb..57aa5ef6cefd 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll @@ -140,7 +140,7 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { } ; CHECK-LABEL: @merge_global_store_4_constants_mixed_i32_f32 -; CHECK: store <4 x float> , <4 x float> addrspace(1)* %{{[0-9]+}} +; CHECK: store <4 x i32> , <4 x i32> addrspace(1)* %{{[0-9]+$}} define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2