forked from OSchip/llvm-project
Revert "[SLP]Improve graph reordering."
This reverts commite408d1dfab
and 2 other (4b25c11321
andc2deb2afaf
) related to fix the problem with the reordering shuffles.
This commit is contained in:
parent
d2b2ab4e1c
commit
7d9d926a18
File diff suppressed because it is too large
Load Diff
|
@ -1,32 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
define void @foo() local_unnamed_addr {
|
||||
; CHECK-LABEL: @foo(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load volatile double, double* poison, align 8
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load volatile double, double* poison, align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load volatile double, double* poison, align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load volatile double, double* poison, align 8
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[D30_0734:%.*]] = phi double [ undef, [[FOR_BODY]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[D01_0733:%.*]] = phi double [ undef, [[FOR_BODY]] ], [ [[TMP1]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[D11_0732:%.*]] = phi double [ undef, [[FOR_BODY]] ], [ [[TMP2]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[D21_0731:%.*]] = phi double [ undef, [[FOR_BODY]] ], [ [[TMP3]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: br label [[FOR_BODY]]
|
||||
;
|
||||
entry:
|
||||
%0 = load volatile double, double* poison, align 8
|
||||
%1 = load volatile double, double* poison, align 8
|
||||
%2 = load volatile double, double* poison, align 8
|
||||
%3 = load volatile double, double* poison, align 8
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%d30.0734 = phi double [ undef, %for.body ], [ %0, %entry ]
|
||||
%d01.0733 = phi double [ undef, %for.body ], [ %1, %entry ]
|
||||
%d11.0732 = phi double [ undef, %for.body ], [ %2, %entry ]
|
||||
%d21.0731 = phi double [ undef, %for.body ], [ %3, %entry ]
|
||||
br label %for.body
|
||||
}
|
|
@ -1,103 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; ModuleID = 'repro1.ll'
|
||||
; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=aarch64-w32-windows-gnu | FileCheck %s
|
||||
|
||||
define i32 @foo() {
|
||||
; CHECK-LABEL: @foo(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[FOR_COND15_PREHEADER:%.*]]
|
||||
; CHECK: for.cond15.preheader:
|
||||
; CHECK-NEXT: br label [[IF_END:%.*]]
|
||||
; CHECK: for.cond15:
|
||||
; CHECK-NEXT: br label [[IF_END_1:%.*]]
|
||||
; CHECK: if.end:
|
||||
; CHECK-NEXT: br label [[FOR_COND15:%.*]]
|
||||
; CHECK: for.end39:
|
||||
; CHECK-NEXT: switch i32 undef, label [[DO_BODY:%.*]] [
|
||||
; CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
|
||||
; CHECK-NEXT: i32 1, label [[SW_BB195:%.*]]
|
||||
; CHECK-NEXT: ]
|
||||
; CHECK: sw.bb:
|
||||
; CHECK-NEXT: [[ARRAYIDX43:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0
|
||||
; CHECK-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0
|
||||
; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1
|
||||
; CHECK-NEXT: [[ARRAYIDX58:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX43]] to <4 x double>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], <double 0x7FF8000000000000, double 0x7FF8000000000000, double 0x7FF8000000000000, double 0x7FF8000000000000>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> poison, <4 x double> zeroinitializer, <4 x double> [[TMP2]])
|
||||
; CHECK-NEXT: br label [[SW_EPILOG:%.*]]
|
||||
; CHECK: sw.bb195:
|
||||
; CHECK-NEXT: br label [[SW_EPILOG]]
|
||||
; CHECK: do.body:
|
||||
; CHECK-NEXT: unreachable
|
||||
; CHECK: sw.epilog:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x double> [ poison, [[SW_BB195]] ], [ [[TMP3]], [[SW_BB]] ]
|
||||
; CHECK-NEXT: ret i32 undef
|
||||
; CHECK: if.end.1:
|
||||
; CHECK-NEXT: br label [[FOR_COND15_1:%.*]]
|
||||
; CHECK: for.cond15.1:
|
||||
; CHECK-NEXT: br i1 undef, label [[FOR_END39:%.*]], label [[FOR_COND15_PREHEADER]]
|
||||
;
|
||||
entry:
|
||||
%conv = sitofp i32 undef to double
|
||||
%conv2 = sitofp i32 undef to double
|
||||
br label %for.cond15.preheader
|
||||
|
||||
for.cond15.preheader: ; preds = %for.cond15.1, %entry
|
||||
br label %if.end
|
||||
|
||||
for.cond15: ; preds = %if.end
|
||||
br label %if.end.1
|
||||
|
||||
if.end: ; preds = %for.cond15.preheader
|
||||
br label %for.cond15
|
||||
|
||||
for.end39: ; preds = %for.cond15.1
|
||||
switch i32 undef, label %do.body [
|
||||
i32 0, label %sw.bb
|
||||
i32 1, label %sw.bb195
|
||||
]
|
||||
|
||||
sw.bb: ; preds = %for.end39
|
||||
%arrayidx43 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0
|
||||
%0 = load double, double* %arrayidx43, align 8
|
||||
%arrayidx45 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0
|
||||
%1 = load double, double* %arrayidx45, align 8
|
||||
%arrayidx51 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1
|
||||
%2 = load double, double* %arrayidx51, align 8
|
||||
%arrayidx58 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1
|
||||
%3 = load double, double* %arrayidx58, align 8
|
||||
%mul = fmul double undef, %conv2
|
||||
%mul109 = fmul double undef, %conv
|
||||
%mul143 = fmul double %0, %mul
|
||||
%4 = call double @llvm.fmuladd.f64(double undef, double %conv2, double %mul143)
|
||||
%mul154 = fmul double %1, %mul109
|
||||
%5 = call double @llvm.fmuladd.f64(double undef, double %conv, double %mul154)
|
||||
%mul172 = fmul double %3, %mul
|
||||
%6 = call double @llvm.fmuladd.f64(double undef, double %conv2, double %mul172)
|
||||
%mul183 = fmul double %2, %mul109
|
||||
%7 = call double @llvm.fmuladd.f64(double undef, double %conv, double %mul183)
|
||||
br label %sw.epilog
|
||||
|
||||
sw.bb195: ; preds = %for.end39
|
||||
br label %sw.epilog
|
||||
|
||||
do.body: ; preds = %for.end39
|
||||
unreachable
|
||||
|
||||
sw.epilog: ; preds = %sw.bb195, %sw.bb
|
||||
%x4.0 = phi double [ undef, %sw.bb195 ], [ %7, %sw.bb ]
|
||||
%x3.0 = phi double [ undef, %sw.bb195 ], [ %6, %sw.bb ]
|
||||
%x1.0 = phi double [ undef, %sw.bb195 ], [ %5, %sw.bb ]
|
||||
%x0.0 = phi double [ undef, %sw.bb195 ], [ %4, %sw.bb ]
|
||||
ret i32 undef
|
||||
|
||||
if.end.1: ; preds = %for.cond15
|
||||
br label %for.cond15.1
|
||||
|
||||
for.cond15.1: ; preds = %if.end.1
|
||||
br i1 undef, label %for.end39, label %for.cond15.preheader
|
||||
}
|
||||
|
||||
declare double @llvm.fmuladd.f64(double, double, double)
|
|
@ -6,14 +6,16 @@ target triple = "aarch64--linux-gnu"
|
|||
|
||||
define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
|
||||
; CHECK-LABEL: @build_vec_v2i64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[V0:%.*]], [[V1:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i64> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 1, i32 2>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = sub <2 x i64> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP6]], [[TMP3]]
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[V0:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[V1:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i64> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP8]], [[TMP5]]
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP9]]
|
||||
;
|
||||
%v0.0 = extractelement <2 x i64> %v0, i32 0
|
||||
%v0.1 = extractelement <2 x i64> %v0, i32 1
|
||||
|
@ -72,14 +74,16 @@ define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
|
|||
|
||||
define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
|
||||
; CHECK-LABEL: @build_vec_v4i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 3, i32 6>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[TMP3]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP5]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP9]]
|
||||
;
|
||||
%v0.0 = extractelement <4 x i32> %v0, i32 0
|
||||
%v0.1 = extractelement <4 x i32> %v0, i32 1
|
||||
|
@ -110,14 +114,16 @@ define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
|
|||
|
||||
define <4 x i32> @build_vec_v4i32_reuse_0(<2 x i32> %v0, <2 x i32> %v1) {
|
||||
; CHECK-LABEL: @build_vec_v4i32_reuse_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[V0:%.*]], [[V1:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 1, i32 2>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = sub <2 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP6]], [[TMP3]]
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[V1:%.*]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP8]], [[TMP5]]
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
||||
; CHECK-NEXT: ret <4 x i32> [[SHUFFLE]]
|
||||
;
|
||||
%v0.0 = extractelement <2 x i32> %v0, i32 0
|
||||
|
@ -223,20 +229,22 @@ define <4 x i32> @build_vec_v4i32_3_binops(<2 x i32> %v0, <2 x i32> %v1) {
|
|||
|
||||
define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
|
||||
; CHECK-LABEL: @reduction_v4i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 7, i32 2>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], <i32 15, i32 15, i32 15, i32 15>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i32> [[TMP8]], <i32 65537, i32 65537, i32 65537, i32 65537>
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = mul nuw <4 x i32> [[TMP9]], <i32 65535, i32 65535, i32 65535, i32 65535>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> [[TMP10]], [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i32> [[TMP11]], [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP12]])
|
||||
; CHECK-NEXT: ret i32 [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = lshr <4 x i32> [[TMP9]], <i32 15, i32 15, i32 15, i32 15>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i32> [[TMP10]], <i32 65537, i32 65537, i32 65537, i32 65537>
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = mul nuw <4 x i32> [[TMP11]], <i32 65535, i32 65535, i32 65535, i32 65535>
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = xor <4 x i32> [[TMP13]], [[TMP12]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP14]])
|
||||
; CHECK-NEXT: ret i32 [[TMP15]]
|
||||
;
|
||||
%v0.0 = extractelement <4 x i32> %v0, i32 0
|
||||
%v0.1 = extractelement <4 x i32> %v0, i32 1
|
||||
|
|
|
@ -6,14 +6,16 @@ target triple = "aarch64--linux-gnu"
|
|||
|
||||
define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
|
||||
; CHECK-LABEL: @build_vec_v2i64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[V0:%.*]], [[V1:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i64> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 1, i32 2>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = sub <2 x i64> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP6]], [[TMP3]]
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[V0:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[V1:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i64> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP8]], [[TMP5]]
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP9]]
|
||||
;
|
||||
%v0.0 = extractelement <2 x i64> %v0, i32 0
|
||||
%v0.1 = extractelement <2 x i64> %v0, i32 1
|
||||
|
@ -72,14 +74,16 @@ define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
|
|||
|
||||
define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
|
||||
; CHECK-LABEL: @build_vec_v4i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 3, i32 6>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[TMP3]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP5]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP9]]
|
||||
;
|
||||
%v0.0 = extractelement <4 x i32> %v0, i32 0
|
||||
%v0.1 = extractelement <4 x i32> %v0, i32 1
|
||||
|
@ -110,14 +114,16 @@ define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
|
|||
|
||||
define <4 x i32> @build_vec_v4i32_reuse_0(<2 x i32> %v0, <2 x i32> %v1) {
|
||||
; CHECK-LABEL: @build_vec_v4i32_reuse_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[V0:%.*]], [[V1:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 1, i32 2>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = sub <2 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP6]], [[TMP3]]
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[V1:%.*]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP8]], [[TMP5]]
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
||||
; CHECK-NEXT: ret <4 x i32> [[SHUFFLE]]
|
||||
;
|
||||
%v0.0 = extractelement <2 x i32> %v0, i32 0
|
||||
|
@ -223,20 +229,22 @@ define <4 x i32> @build_vec_v4i32_3_binops(<2 x i32> %v0, <2 x i32> %v1) {
|
|||
|
||||
define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
|
||||
; CHECK-LABEL: @reduction_v4i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 7, i32 2>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], <i32 15, i32 15, i32 15, i32 15>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i32> [[TMP8]], <i32 65537, i32 65537, i32 65537, i32 65537>
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = mul nuw <4 x i32> [[TMP9]], <i32 65535, i32 65535, i32 65535, i32 65535>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> [[TMP10]], [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i32> [[TMP11]], [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP12]])
|
||||
; CHECK-NEXT: ret i32 [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[V0]], [[V1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = lshr <4 x i32> [[TMP9]], <i32 15, i32 15, i32 15, i32 15>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i32> [[TMP10]], <i32 65537, i32 65537, i32 65537, i32 65537>
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = mul nuw <4 x i32> [[TMP11]], <i32 65535, i32 65535, i32 65535, i32 65535>
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = xor <4 x i32> [[TMP13]], [[TMP12]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP14]])
|
||||
; CHECK-NEXT: ret i32 [[TMP15]]
|
||||
;
|
||||
%v0.0 = extractelement <4 x i32> %v0, i32 0
|
||||
%v0.1 = extractelement <4 x i32> %v0, i32 1
|
||||
|
|
|
@ -338,26 +338,32 @@ define void @reorder_alt_rightsubTree(double* nocapture %c, double* noalias noca
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @vec_shuff_reorder() #0 {
|
||||
; CHECK-LABEL: @vec_shuff_reorder(
|
||||
; Dont vectorization of following code for float data type as sub is not commutative-
|
||||
; fc[0] = fb[0]+fa[0];
|
||||
; fc[1] = fa[1]-fb[1];
|
||||
; fc[2] = fa[2]+fb[2];
|
||||
; fc[3] = fb[3]-fa[3];
|
||||
; In the above code we can swap the 1st and 2nd operation as fadd is commutative
|
||||
; but not 2nd or 4th as fsub is not commutative.
|
||||
|
||||
define void @no_vec_shuff_reorder() #0 {
|
||||
; CHECK-LABEL: @no_vec_shuff_reorder(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1) to <2 x float>*), align 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1) to <2 x float>*), align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP5]], i32 3
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP6]], i32 3
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = fadd <4 x float> [[TMP10]], [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = fsub <4 x float> [[TMP10]], [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
; CHECK-NEXT: store <4 x float> [[TMP17]], <4 x float>* bitcast ([4 x float]* @fc to <4 x float>*), align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: store float [[TMP3]], float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = fsub float [[TMP4]], [[TMP5]]
|
||||
; CHECK-NEXT: store float [[TMP6]], float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fadd float [[TMP7]], [[TMP8]]
|
||||
; CHECK-NEXT: store float [[TMP9]], float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = fsub float [[TMP10]], [[TMP11]]
|
||||
; CHECK-NEXT: store float [[TMP12]], float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
|
||||
|
|
|
@ -61,12 +61,12 @@ define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) {
|
|||
; AVX-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; AVX-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
|
||||
; AVX-NEXT: store float [[ACC1_056]], float* [[ARRAYIDX2]], align 4
|
||||
; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
|
||||
; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
|
||||
; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
|
||||
; AVX-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP0]], [[TMP3]]
|
||||
; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
|
||||
; AVX-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[SHUFFLE]], [[TMP3]]
|
||||
; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer
|
||||
; AVX-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP5]], [[SHUFFLE]]
|
||||
; AVX-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP5]], [[TMP4]]
|
||||
; AVX-NEXT: [[TMP7:%.*]] = fcmp olt <2 x float> [[TMP6]], <float 1.000000e+00, float 1.000000e+00>
|
||||
; AVX-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP6]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
|
||||
; AVX-NEXT: [[TMP9:%.*]] = fcmp olt <2 x float> [[TMP8]], <float -1.000000e+00, float -1.000000e+00>
|
||||
|
|
|
@ -30,11 +30,11 @@ define void @fextr1(double* %ptr) {
|
|||
; CHECK-LABEL: @fextr1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* undef, align 16
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[LD]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[LD]], <double 1.200000e+00, double 3.400000e+00>
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[SHUFFLE]], <double 3.400000e+00, double 1.200000e+00>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P1]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[SHUFFLE]], <2 x double>* [[TMP1]], align 4
|
||||
; CHECK-NEXT: store <2 x double> [[TMP0]], <2 x double>* [[TMP1]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
|
|
@ -8,12 +8,12 @@ define i32 @fn1() {
|
|||
; CHECK-LABEL: @fn1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> <i32 8, i32 poison, i32 ptrtoint (i32 ()* @fn1 to i32), i32 ptrtoint (i32 ()* @fn1 to i32)>, i32 [[TMP2]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 6, i32 0, i32 0>
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
|
||||
; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> <i32 poison, i32 ptrtoint (i32 ()* @fn1 to i32), i32 ptrtoint (i32 ()* @fn1 to i32), i32 8>, i32 [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> <i32 6, i32 0, i32 0, i32 0>
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
|
||||
; CHECK-NEXT: ret i32 0
|
||||
;
|
||||
entry:
|
||||
|
|
|
@ -11,21 +11,21 @@ define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn
|
|||
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
|
||||
; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0
|
||||
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 2
|
||||
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 3
|
||||
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[GEP_5]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[INN_ADDR]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP2]], [[SHUFFLE]]
|
||||
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
|
||||
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE1]]
|
||||
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
|
||||
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1
|
||||
; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2
|
||||
; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
|
||||
; CHECK-NEXT: store <4 x i32> [[SHUFFLE1]], <4 x i32>* [[TMP6]], align 4
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
|
||||
; CHECK-NEXT: ret i32 undef
|
||||
;
|
||||
%in.addr = getelementptr inbounds i32, i32* %in, i64 0
|
||||
|
@ -69,22 +69,22 @@ define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias noca
|
|||
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP2]], [[TMP10]]
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[SHUFFLE]], [[TMP10]]
|
||||
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
|
||||
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1
|
||||
; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2
|
||||
; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
|
||||
; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP12]], align 4
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* [[TMP12]], align 4
|
||||
; CHECK-NEXT: ret i32 undef
|
||||
;
|
||||
%in.addr = getelementptr inbounds i32, i32* %in, i64 0
|
||||
|
|
|
@ -26,31 +26,32 @@ define dso_local void @j() local_unnamed_addr {
|
|||
; CHECK-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x float>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP6]], <float 1.000000e+01, float 1.000000e+01>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> <float 1.000000e+00, float 0.000000e+00>, [[TMP7]]
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 1
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 0
|
||||
; CHECK-NEXT: store float [[TMP9]], float* @g, align 4
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x float> [[SHUFFLE]], <float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP10]], i32 2
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x float> [[SHUFFLE]], <float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP10]], i32 3
|
||||
; CHECK-NEXT: store float [[TMP11]], float* @c, align 4
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP10]], i32 2
|
||||
; CHECK-NEXT: store float [[TMP12]], float* @d, align 4
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP10]], i32 3
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP10]], i32 1
|
||||
; CHECK-NEXT: store float [[TMP13]], float* @e, align 4
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP10]], i32 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP10]], i32 0
|
||||
; CHECK-NEXT: store float [[TMP14]], float* @f, align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 14
|
||||
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 15
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* @a, align 4
|
||||
; CHECK-NEXT: [[CONV19:%.*]] = sitofp i32 [[TMP15]] to float
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> <float poison, float -1.000000e+00, float poison, float -1.000000e+00>, float [[CONV19]], i32 0
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 0
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP16]], float [[TMP17]], i32 2
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = fsub <4 x float> [[TMP10]], [[TMP18]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = fadd <4 x float> [[TMP10]], [[TMP18]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> <float -1.000000e+00, float -1.000000e+00, float poison, float poison>, float [[CONV19]], i32 2
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 2
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP16]], float [[TMP17]], i32 3
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = fadd <4 x float> [[TMP10]], [[TMP18]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = fsub <4 x float> [[TMP10]], [[TMP18]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = fptosi <4 x float> [[TMP21]] to <4 x i32>
|
||||
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP22]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>*
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 4
|
||||
; CHECK-NEXT: store <4 x i32> [[SHUFFLE1]], <4 x i32>* [[TMP23]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
|
|
@ -15,8 +15,8 @@ define void @hoge() {
|
|||
; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TMP0]] to <2 x i32>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> <i32 undef, i32 63>, [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], undef
|
||||
; CHECK-NEXT: [[SHUFFLE10:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[SHUFFLE10]], <i32 undef, i32 15, i32 31, i32 47>
|
||||
; CHECK-NEXT: [[SHUFFLE10:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[SHUFFLE10]], <i32 15, i32 31, i32 47, i32 poison>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
|
||||
; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[TMP5]], i32 undef
|
||||
; CHECK-NEXT: [[T20:%.*]] = icmp sgt i32 [[T19]], 63
|
||||
|
|
|
@ -7,15 +7,15 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4
|
|||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <2 x i32>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A1:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A7:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A8:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A1:%.*]], i32 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A2:%.*]], i32 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A3:%.*]], i32 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A4:%.*]], i32 5
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A5:%.*]], i32 6
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A6:%.*]], i32 7
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]])
|
||||
; CHECK-NEXT: ret i32 [[TMP11]]
|
||||
|
@ -57,15 +57,15 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
|
|||
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 1, i32 1, i32 0, i32 2, i32 1>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A1:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A6:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A4:%.*]], i32 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A5:%.*]], i32 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A8:%.*]], i32 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A2:%.*]], i32 5
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A3:%.*]], i32 7
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]])
|
||||
; CHECK-NEXT: ret i32 [[TMP11]]
|
||||
|
@ -111,15 +111,15 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
|
|||
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 3, i32 2, i32 3, i32 0, i32 1, i32 0, i32 2, i32 1>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A1:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A4:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A6:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A5:%.*]], i32 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A8:%.*]], i32 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A2:%.*]], i32 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A7:%.*]], i32 5
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A1:%.*]], i32 6
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A3:%.*]], i32 7
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]])
|
||||
; CHECK-NEXT: ret i32 [[TMP11]]
|
||||
|
|
Loading…
Reference in New Issue