forked from OSchip/llvm-project
[SLP] Use the minimum alignment of the load bundle when forming a masked.gather
Instead of the first load. That works when vectorizing contiguous loads,
but not for gathers.
Fixes a miscompile introduced in fcad8d3635
.
This commit is contained in:
parent
680931af27
commit
4dbe12e866
|
@ -4561,7 +4561,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
} else {
|
||||
assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");
|
||||
Value *VecPtr = vectorizeTree(E->getOperand(0));
|
||||
NewLI = Builder.CreateMaskedGather(VecPtr, LI->getAlign());
|
||||
// Use the minimum alignment of the gathered loads.
|
||||
Align CommonAlignment = LI->getAlign();
|
||||
for (Value *V : E->Scalars)
|
||||
CommonAlignment =
|
||||
commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
|
||||
NewLI = Builder.CreateMaskedGather(VecPtr, CommonAlignment);
|
||||
}
|
||||
Value *V = propagateMetadata(NewLI, E->Scalars);
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ define void @foo() {
|
|||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @foo(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> <i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 0), i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 2)>, i32 16, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
|
||||
; AVX-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> <i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 0), i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 2)>, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
|
||||
; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
|
||||
; AVX-NEXT: store <8 x i32> [[SHUFFLE]], <8 x i32>* bitcast ([8 x i32]* @a to <8 x i32>*), align 16
|
||||
; AVX-NEXT: ret void
|
||||
|
|
Loading…
Reference in New Issue