[SLP] Fix PR38339: Instruction does not dominate all uses!

Summary:
If the ExtractElement instructions can be optimized out during the
vectorization and we need to reshuffle the parent vector, this
ShuffleInstruction may be inserted in the wrong place causing compiler
to produce incorrect code.

Reviewers: spatel, RKSimon, mkuper, hfinkel, javed.absar

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D49928

llvm-svn: 338380
This commit is contained in:
Alexey Bataev 2018-07-31 14:02:43 +00:00
parent 7a49ce4ac4
commit c0c3a6ed5e
2 changed files with 35 additions and 0 deletions

View File

@ -3111,6 +3111,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// TODO: Merge this shuffle with the ReorderShuffleMask.
if (!E->ReorderIndices.empty())
Builder.SetInsertPoint(VL0);
else if (auto *I = dyn_cast<Instruction>(V))
Builder.SetInsertPoint(I->getParent(),
std::next(I->getIterator()));
else
Builder.SetInsertPoint(&F->getEntryBlock(),
F->getEntryBlock().getFirstInsertionPt());
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}

View File

@ -0,0 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer -S -mtriple=aarch64-apple-ios -mcpu=cyclone -o - %s | FileCheck %s
define void @f1(<2 x i16> %x, i16* %a) {
; CHECK-LABEL: @f1(
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[X:%.*]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
; CHECK-NEXT: store i16 [[TMP1]], i16* [[A:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP2]], align 2
; CHECK-NEXT: ret void
;
%t2 = extractelement <2 x i16> %x, i32 0
%t3 = extractelement <2 x i16> %x, i32 1
%ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
%ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
%ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
%ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
store i16 %t2, i16* %a
store i16 %t2, i16* %ptr0
store i16 %t3, i16* %ptr1
store i16 %t3, i16* %ptr2
store i16 %t2, i16* %ptr3
ret void
}