[SLP] Use the minimum alignment of the load bundle when forming a masked.gather

Instead of the first load. That works when vectorizing contiguous loads, but not for gathers. Fixes a miscompile introduced in fcad8d3635.
2020-11-18 12:50:24 +01:00 · 2020-11-18 12:50:24 +01:00 · 4dbe12e866
parent 680931af27
commit 4dbe12e866
2 changed files with 7 additions and 2 deletions
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@ -4561,7 +4561,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
      } else {
        assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");
        Value *VecPtr = vectorizeTree(E->getOperand(0));
-        NewLI = Builder.CreateMaskedGather(VecPtr, LI->getAlign());
+        // Use the minimum alignment of the gathered loads.
+        Align CommonAlignment = LI->getAlign();
+        for (Value *V : E->Scalars)
+          CommonAlignment =
+              commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
+        NewLI = Builder.CreateMaskedGather(VecPtr, CommonAlignment);
      }
      Value *V = propagateMetadata(NewLI, E->Scalars);

--- a/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll
@ -24,7 +24,7 @@ define void @foo() {
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @foo(
-; AVX-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> <i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 0), i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 2)>, i32 16, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
+; AVX-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> <i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 0), i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 2)>, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
 ; AVX-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
 ; AVX-NEXT:    store <8 x i32> [[SHUFFLE]], <8 x i32>* bitcast ([8 x i32]* @a to <8 x i32>*), align 16
 ; AVX-NEXT:    ret void