forked from OSchip/llvm-project
[SLP] Fix insert point for reused extract instructions.
Summary: Reworked the previously committed patch to insert shuffles for reused extract element instructions in the correct position. Previous logic was incorrect, and might lead to the crash with PHIs and EH instructions. Reviewers: efriedma, javed.absar Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D50143 llvm-svn: 339166
This commit is contained in:
parent
f4f5b7eea3
commit
0edcd0278d
|
@ -3109,14 +3109,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
}
|
||||
if (NeedToShuffleReuses) {
|
||||
// TODO: Merge this shuffle with the ReorderShuffleMask.
|
||||
if (!E->ReorderIndices.empty())
|
||||
if (E->ReorderIndices.empty())
|
||||
Builder.SetInsertPoint(VL0);
|
||||
else if (auto *I = dyn_cast<Instruction>(V))
|
||||
Builder.SetInsertPoint(I->getParent(),
|
||||
std::next(I->getIterator()));
|
||||
else
|
||||
Builder.SetInsertPoint(&F->getEntryBlock(),
|
||||
F->getEntryBlock().getFirstInsertionPt());
|
||||
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
|
||||
E->ReuseShuffleIndices, "shuffle");
|
||||
}
|
||||
|
|
|
@ -27,3 +27,98 @@ define void @f1(<2 x i16> %x, i16* %a) {
|
|||
store i16 %t2, i16* %ptr3
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @f2(<2 x i16> %x, i16* %a) {
|
||||
; CHECK-LABEL: @f2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[CONT:%.*]]
|
||||
; CHECK: cont:
|
||||
; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
|
||||
; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
|
||||
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
|
||||
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
|
||||
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
|
||||
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
|
||||
; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
|
||||
; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2
|
||||
; CHECK-NEXT: [[A_VAL:%.*]] = load i16, i16* [[A]], align 2
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br label %cont
|
||||
|
||||
cont: ; preds = %entry, %cont
|
||||
%xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ]
|
||||
%aa = phi i16* [ %a, %entry ], [ undef, %cont ]
|
||||
%t2 = extractelement <2 x i16> %xx, i32 0
|
||||
%t3 = extractelement <2 x i16> %xx, i32 1
|
||||
%ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
|
||||
%ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
|
||||
%ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
|
||||
%ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
|
||||
store i16 %t2, i16* %a
|
||||
store i16 %t2, i16* %ptr0
|
||||
store i16 %t3, i16* %ptr1
|
||||
store i16 %t3, i16* %ptr2
|
||||
store i16 %t2, i16* %ptr3
|
||||
%a_val = load i16, i16* %a, align 2
|
||||
%cmp = icmp eq i16 %a_val, 0
|
||||
br i1 %cmp, label %cont, label %exit
|
||||
|
||||
exit: ; preds = %cont
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @f3(<2 x i16> %x, i16* %a) {
|
||||
; CHECK-LABEL: @f3(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[CONT:%.*]]
|
||||
; CHECK: cont:
|
||||
; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
|
||||
; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
|
||||
; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[REORDER_SHUFFLE]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
|
||||
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
|
||||
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
|
||||
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
|
||||
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
|
||||
; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
|
||||
; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2
|
||||
; CHECK-NEXT: [[A_VAL:%.*]] = load i16, i16* [[A]], align 2
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br label %cont
|
||||
|
||||
cont: ; preds = %entry, %cont
|
||||
%xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ]
|
||||
%aa = phi i16* [ %a, %entry ], [ undef, %cont ]
|
||||
%t2 = extractelement <2 x i16> %xx, i32 0
|
||||
%t3 = extractelement <2 x i16> %xx, i32 1
|
||||
%ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
|
||||
%ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
|
||||
%ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
|
||||
%ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
|
||||
store i16 %t3, i16* %a
|
||||
store i16 %t3, i16* %ptr0
|
||||
store i16 %t2, i16* %ptr1
|
||||
store i16 %t2, i16* %ptr2
|
||||
store i16 %t3, i16* %ptr3
|
||||
%a_val = load i16, i16* %a, align 2
|
||||
%cmp = icmp eq i16 %a_val, 0
|
||||
br i1 %cmp, label %cont, label %exit
|
||||
|
||||
exit: ; preds = %cont
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue