[SLP]Need shrink the load vector after reordering.

After merging the shuffles, we cannot rely on the previous shuffle
anymore and need to shrink the final shuffle, if it is required.

Reported in D92668

Differential Revision: https://reviews.llvm.org/D93967
This commit is contained in:
Alexey Bataev 2021-01-01 08:43:33 -08:00
parent 213329d7c6
commit 4284afdf94
2 changed files with 9 additions and 13 deletions

View File

@ -4260,18 +4260,13 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
if (E->isSame(VL)) { if (E->isSame(VL)) {
Value *V = vectorizeTree(E); Value *V = vectorizeTree(E);
if (VL.size() == E->Scalars.size() && !E->ReuseShuffleIndices.empty()) { if (VL.size() == E->Scalars.size() && !E->ReuseShuffleIndices.empty()) {
// We need to get the vectorized value but without shuffle. // Reshuffle to get only unique values.
if (auto *SV = dyn_cast<ShuffleVectorInst>(V)) { SmallVector<int, 4> UniqueIdxs;
V = SV->getOperand(0); SmallSet<int, 4> UsedIdxs;
} else { for (int Idx : E->ReuseShuffleIndices)
// Reshuffle to get only unique values. if (UsedIdxs.insert(Idx).second)
SmallVector<int, 4> UniqueIdxs; UniqueIdxs.emplace_back(Idx);
SmallSet<int, 4> UsedIdxs; V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle");
for (int Idx : E->ReuseShuffleIndices)
if (UsedIdxs.insert(Idx).second)
UniqueIdxs.emplace_back(Idx);
V = Builder.CreateShuffleVector(V, UniqueIdxs);
}
} }
return V; return V;
} }

View File

@ -10,7 +10,8 @@ define void @wombat(i32* %ptr, i32* %ptr1) {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[PTR1:%.*]], i32 3 ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[PTR1:%.*]], i32 3
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 -1, i32 -1> ; CHECK-NEXT: [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[SHRINK_SHUFFLE]], <i32 -1, i32 -1>
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 4 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 4
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 5 ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 5