[SLP] Use the minimum alignment of the load bundle when forming a masked.gather

Instead of the first load. That works when vectorizing contiguous loads,
but not for gathers.

Fixes a miscompile introduced in fcad8d3635.
This commit is contained in:
Benjamin Kramer 2020-11-18 12:50:24 +01:00
parent 680931af27
commit 4dbe12e866
2 changed files with 7 additions and 2 deletions

View File

@ -4561,7 +4561,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
} else {
assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");
Value *VecPtr = vectorizeTree(E->getOperand(0));
NewLI = Builder.CreateMaskedGather(VecPtr, LI->getAlign());
// Use the minimum alignment of the gathered loads.
Align CommonAlignment = LI->getAlign();
for (Value *V : E->Scalars)
CommonAlignment =
commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
NewLI = Builder.CreateMaskedGather(VecPtr, CommonAlignment);
}
Value *V = propagateMetadata(NewLI, E->Scalars);

View File

@ -24,7 +24,7 @@ define void @foo() {
; SSE-NEXT: ret void
;
; AVX-LABEL: @foo(
; AVX-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> <i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 0), i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 2)>, i32 16, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
; AVX-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> <i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 0), i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 2)>, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
; AVX-NEXT: store <8 x i32> [[SHUFFLE]], <8 x i32>* bitcast ([8 x i32]* @a to <8 x i32>*), align 16
; AVX-NEXT: ret void