forked from OSchip/llvm-project
[SLP] Fix for PR32086: Count InsertElementInstr of the same elements as shuffle.
Summary: If the same value is going to be vectorized several times in the same tree entry, this entry is considered to be a gather entry and cost of this gather is counter as cost of InsertElementInstrs for each gathered value. But we can consider these elements as ShuffleInstr with SK_PermuteSingle shuffle kind. Reviewers: spatel, RKSimon, mkuper, hfinkel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D38697 llvm-svn: 323662
This commit is contained in:
parent
10f5c9e765
commit
9c5c103283
File diff suppressed because it is too large
Load Diff
|
@ -4,15 +4,14 @@
|
||||||
define void @i64_simplified(i64* noalias %st, i64* noalias %ld) {
|
define void @i64_simplified(i64* noalias %st, i64* noalias %ld) {
|
||||||
; CHECK-LABEL: @i64_simplified(
|
; CHECK-LABEL: @i64_simplified(
|
||||||
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
|
||||||
; CHECK-NEXT: [[T0:%.*]] = load i64, i64* [[LD]], align 8
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>*
|
||||||
; CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
|
||||||
|
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
||||||
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
|
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
|
||||||
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
|
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
|
||||||
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
|
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
|
||||||
; CHECK-NEXT: store i64 [[T0]], i64* [[ST]], align 8
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST]] to <4 x i64>*
|
||||||
; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX3]], align 8
|
; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8
|
||||||
; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX4]], align 8
|
|
||||||
; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX5]], align 8
|
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
%arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
|
%arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
|
||||||
|
@ -64,16 +63,16 @@ define void @i64_simplifiedi_reversed(i64* noalias %st, i64* noalias %ld) {
|
||||||
define void @i64_simplifiedi_extract(i64* noalias %st, i64* noalias %ld) {
|
define void @i64_simplifiedi_extract(i64* noalias %st, i64* noalias %ld) {
|
||||||
; CHECK-LABEL: @i64_simplifiedi_extract(
|
; CHECK-LABEL: @i64_simplifiedi_extract(
|
||||||
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
|
||||||
; CHECK-NEXT: [[T0:%.*]] = load i64, i64* [[LD]], align 8
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>*
|
||||||
; CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
|
||||||
|
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
|
||||||
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
|
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
|
||||||
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
|
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
|
||||||
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
|
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
|
||||||
; CHECK-NEXT: store i64 [[T0]], i64* [[ST]], align 8
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST]] to <4 x i64>*
|
||||||
; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX3]], align 8
|
; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8
|
||||||
; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX4]], align 8
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[SHUFFLE]], i32 3
|
||||||
; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX5]], align 8
|
; CHECK-NEXT: store i64 [[TMP4]], i64* [[LD]], align 8
|
||||||
; CHECK-NEXT: store i64 [[T1]], i64* [[LD]], align 8
|
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
%arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
|
%arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
|
||||||
|
|
|
@ -137,17 +137,19 @@ define i8 @k(<4 x i8> %x) {
|
||||||
|
|
||||||
define i8 @k_bb(<4 x i8> %x) {
|
define i8 @k_bb(<4 x i8> %x) {
|
||||||
; CHECK-LABEL: @k_bb(
|
; CHECK-LABEL: @k_bb(
|
||||||
|
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
|
||||||
; CHECK-NEXT: br label [[BB1:%.*]]
|
; CHECK-NEXT: br label [[BB1:%.*]]
|
||||||
; CHECK: bb1:
|
; CHECK: bb1:
|
||||||
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X:%.*]], [[X]]
|
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
|
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
|
||||||
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]]
|
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
|
||||||
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> undef, <2 x i32> <i32 3, i32 2>
|
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X]], [[X]]
|
||||||
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
|
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X0X0]], [[X3X3]]
|
||||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
|
||||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
|
||||||
; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
|
; CHECK-NEXT: [[TMP5:%.*]] = add i8 [[TMP3]], [[TMP4]]
|
||||||
; CHECK-NEXT: ret i8 [[TMP8]]
|
; CHECK-NEXT: [[TMP6:%.*]] = sdiv i8 [[TMP2]], [[TMP5]]
|
||||||
|
; CHECK-NEXT: ret i8 [[TMP6]]
|
||||||
;
|
;
|
||||||
%x0 = extractelement <4 x i8> %x, i32 0
|
%x0 = extractelement <4 x i8> %x, i32 0
|
||||||
br label %bb1
|
br label %bb1
|
||||||
|
|
|
@ -16,19 +16,18 @@ target triple = "i386-apple-macosx10.9.0"
|
||||||
define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) {
|
define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) {
|
||||||
; CHECK-LABEL: @foo(
|
; CHECK-LABEL: @foo(
|
||||||
; CHECK-NEXT: entry:
|
; CHECK-NEXT: entry:
|
||||||
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[N:%.*]], i32 0
|
||||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[K:%.*]], i32 1
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[K:%.*]], i32 1
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2
|
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
||||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[K]], i32 3
|
|
||||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||||
; CHECK: for.body:
|
; CHECK: for.body:
|
||||||
; CHECK-NEXT: [[I_024:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD10:%.*]], [[FOR_BODY]] ]
|
; CHECK-NEXT: [[I_024:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD10:%.*]], [[FOR_BODY]] ]
|
||||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_024]]
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_024]]
|
||||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
|
||||||
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
|
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
|
||||||
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP5]]
|
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[SHUFFLE]], [[TMP3]]
|
||||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
|
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
|
||||||
; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
|
; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
|
||||||
; CHECK-NEXT: [[ADD10]] = add nsw i32 [[I_024]], 4
|
; CHECK-NEXT: [[ADD10]] = add nsw i32 [[I_024]], 4
|
||||||
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD10]], 10000
|
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD10]], 10000
|
||||||
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
|
||||||
|
|
Loading…
Reference in New Issue