forked from OSchip/llvm-project
[SLP]Need shrink the load vector after reordering.
After merging the shuffles, we cannot rely on the previous shuffle anymore and need to shrink the final shuffle, if it is required. Reported in D92668 Differential Revision: https://reviews.llvm.org/D93967
This commit is contained in:
parent
213329d7c6
commit
4284afdf94
|
@ -4260,18 +4260,13 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
|
||||||
if (E->isSame(VL)) {
|
if (E->isSame(VL)) {
|
||||||
Value *V = vectorizeTree(E);
|
Value *V = vectorizeTree(E);
|
||||||
if (VL.size() == E->Scalars.size() && !E->ReuseShuffleIndices.empty()) {
|
if (VL.size() == E->Scalars.size() && !E->ReuseShuffleIndices.empty()) {
|
||||||
// We need to get the vectorized value but without shuffle.
|
// Reshuffle to get only unique values.
|
||||||
if (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
|
SmallVector<int, 4> UniqueIdxs;
|
||||||
V = SV->getOperand(0);
|
SmallSet<int, 4> UsedIdxs;
|
||||||
} else {
|
for (int Idx : E->ReuseShuffleIndices)
|
||||||
// Reshuffle to get only unique values.
|
if (UsedIdxs.insert(Idx).second)
|
||||||
SmallVector<int, 4> UniqueIdxs;
|
UniqueIdxs.emplace_back(Idx);
|
||||||
SmallSet<int, 4> UsedIdxs;
|
V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle");
|
||||||
for (int Idx : E->ReuseShuffleIndices)
|
|
||||||
if (UsedIdxs.insert(Idx).second)
|
|
||||||
UniqueIdxs.emplace_back(Idx);
|
|
||||||
V = Builder.CreateShuffleVector(V, UniqueIdxs);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return V;
|
return V;
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,7 +10,8 @@ define void @wombat(i32* %ptr, i32* %ptr1) {
|
||||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
|
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
|
||||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
|
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
|
||||||
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[PTR1:%.*]], i32 3
|
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[PTR1:%.*]], i32 3
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 -1, i32 -1>
|
; CHECK-NEXT: [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[SHRINK_SHUFFLE]], <i32 -1, i32 -1>
|
||||||
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
||||||
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 4
|
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 4
|
||||||
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 5
|
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 5
|
||||||
|
|
Loading…
Reference in New Issue