[SLP]Fix emission of the shrink shuffles.

Need to follow the order of the reused scalars from the
ReuseShuffleIndices mask rather than rely on the natural order.

Differential Revision: https://reviews.llvm.org/D111898
This commit is contained in:
Alexey Bataev 2021-10-15 09:47:50 -07:00
parent 313c657fce
commit b9cfa016da
2 changed files with 7 additions and 6 deletions

View File

@ -5669,17 +5669,18 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
// block: // block:
// %phi = phi <2 x > { .., %entry} {%shuffle, %block} // %phi = phi <2 x > { .., %entry} {%shuffle, %block}
// %2 = shuffle <2 x > %phi, %poison, <4 x > <0, 0, 1, 1> // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
// ... (use %2) // ... (use %2)
// %shuffle = shuffle <2 x> %2, poison, <2 x> {0, 2} // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
// br %block // br %block
SmallVector<int> UniqueIdxs; SmallVector<int> UniqueIdxs(VF, UndefMaskElem);
SmallSet<int, 4> UsedIdxs; SmallSet<int, 4> UsedIdxs;
int Pos = 0; int Pos = 0;
int Sz = VL.size(); int Sz = VL.size();
for (int Idx : E->ReuseShuffleIndices) { for (int Idx : E->ReuseShuffleIndices) {
if (Idx != Sz && UsedIdxs.insert(Idx).second) if (Idx != Sz && Idx != UndefMaskElem &&
UniqueIdxs.emplace_back(Pos); UsedIdxs.insert(Idx).second)
UniqueIdxs[Idx] = Pos;
++Pos; ++Pos;
} }
assert(VF >= UsedIdxs.size() && "Expected vectorization factor " assert(VF >= UsedIdxs.size() && "Expected vectorization factor "

View File

@ -23,7 +23,7 @@ define void @foo(%class.e* %this, %struct.a* %p, i32 %add7) {
; CHECK: sw.bb: ; CHECK: sw.bb:
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[G]] to <2 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[G]] to <2 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> <i32 0, i32 2> ; CHECK-NEXT: [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> <i32 2, i32 0>
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[SHRINK_SHUFFLE]], <i32 -1, i32 -1> ; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[SHRINK_SHUFFLE]], <i32 -1, i32 -1>
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT: br label [[SW_EPILOG]] ; CHECK-NEXT: br label [[SW_EPILOG]]