Precommit transform tests that have poison as insertelement's placeholder

This commit copies existing tests at llvm/Transforms and replaces
'insertelement undef' in those files with 'insertelement poison'.
(see https://reviews.llvm.org/D93586)

Tests listed using this script:

grep -R -E '^[^;]*insertelement <.*> undef,' . | cut -d":" -f1 | uniq |
wc -l

Tests updated:

file_org=llvm/test/Transforms/$1
file=${file_org%.ll}-inseltpoison.ll
cp $file_org $file
sed -i -E 's/^([^;]*)insertelement <(.*)> undef/\1insertelement <\2> poison/g' $file
head -1 $file | grep "Assertions have been autogenerated by utils/update_test_checks.py" -q
if [ "$?" == 1 ]; then
  echo "$file : should be manually updated"
  # I manually updated the script
  exit 1
fi
python3 ./llvm/utils/update_test_checks.py --opt-binary=./build-releaseassert/bin/opt $file
This commit is contained in:
Juneyoung Lee 2020-12-24 11:41:27 +09:00
parent 48ad8194a5
commit db7a2f347f
120 changed files with 44309 additions and 0 deletions

View File

@ -0,0 +1,847 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes
; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
; Determine dereference-ability before unused loads get deleted:
; https://bugs.llvm.org/show_bug.cgi?id=21780
define <4 x double> @PR21780(double* %ptr) {
; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@PR21780
; IS__TUNIT____-SAME: (double* nocapture nofree nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) [[ATTR0:#.*]] {
; IS__TUNIT____-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 1
; IS__TUNIT____-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 2
; IS__TUNIT____-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 3
; IS__TUNIT____-NEXT: [[T0:%.*]] = load double, double* [[PTR]], align 8
; IS__TUNIT____-NEXT: [[T1:%.*]] = load double, double* [[ARRAYIDX1]], align 8
; IS__TUNIT____-NEXT: [[T2:%.*]] = load double, double* [[ARRAYIDX2]], align 8
; IS__TUNIT____-NEXT: [[T3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
; IS__TUNIT____-NEXT: [[VECINIT0:%.*]] = insertelement <4 x double> poison, double [[T0]], i32 0
; IS__TUNIT____-NEXT: [[VECINIT1:%.*]] = insertelement <4 x double> [[VECINIT0]], double [[T1]], i32 1
; IS__TUNIT____-NEXT: [[VECINIT2:%.*]] = insertelement <4 x double> [[VECINIT1]], double [[T2]], i32 2
; IS__TUNIT____-NEXT: [[VECINIT3:%.*]] = insertelement <4 x double> [[VECINIT2]], double [[T3]], i32 3
; IS__TUNIT____-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[VECINIT3]], <4 x double> [[VECINIT3]], <4 x i32> <i32 0, i32 0, i32 2, i32 2>
; IS__TUNIT____-NEXT: ret <4 x double> [[SHUFFLE]]
;
; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@PR21780
; IS__CGSCC____-SAME: (double* nocapture nofree nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) [[ATTR0:#.*]] {
; IS__CGSCC____-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 1
; IS__CGSCC____-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 2
; IS__CGSCC____-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 3
; IS__CGSCC____-NEXT: [[T0:%.*]] = load double, double* [[PTR]], align 8
; IS__CGSCC____-NEXT: [[T1:%.*]] = load double, double* [[ARRAYIDX1]], align 8
; IS__CGSCC____-NEXT: [[T2:%.*]] = load double, double* [[ARRAYIDX2]], align 8
; IS__CGSCC____-NEXT: [[T3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
; IS__CGSCC____-NEXT: [[VECINIT0:%.*]] = insertelement <4 x double> poison, double [[T0]], i32 0
; IS__CGSCC____-NEXT: [[VECINIT1:%.*]] = insertelement <4 x double> [[VECINIT0]], double [[T1]], i32 1
; IS__CGSCC____-NEXT: [[VECINIT2:%.*]] = insertelement <4 x double> [[VECINIT1]], double [[T2]], i32 2
; IS__CGSCC____-NEXT: [[VECINIT3:%.*]] = insertelement <4 x double> [[VECINIT2]], double [[T3]], i32 3
; IS__CGSCC____-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[VECINIT3]], <4 x double> [[VECINIT3]], <4 x i32> <i32 0, i32 0, i32 2, i32 2>
; IS__CGSCC____-NEXT: ret <4 x double> [[SHUFFLE]]
;
; GEP of index 0 is simplified away.
%arrayidx1 = getelementptr inbounds double, double* %ptr, i64 1
%arrayidx2 = getelementptr inbounds double, double* %ptr, i64 2
%arrayidx3 = getelementptr inbounds double, double* %ptr, i64 3
%t0 = load double, double* %ptr, align 8
%t1 = load double, double* %arrayidx1, align 8
%t2 = load double, double* %arrayidx2, align 8
%t3 = load double, double* %arrayidx3, align 8
%vecinit0 = insertelement <4 x double> poison, double %t0, i32 0
%vecinit1 = insertelement <4 x double> %vecinit0, double %t1, i32 1
%vecinit2 = insertelement <4 x double> %vecinit1, double %t2, i32 2
%vecinit3 = insertelement <4 x double> %vecinit2, double %t3, i32 3
%shuffle = shufflevector <4 x double> %vecinit3, <4 x double> %vecinit3, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
ret <4 x double> %shuffle
}
define double @PR21780_only_access3_with_inbounds(double* %ptr) {
; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@PR21780_only_access3_with_inbounds
; IS__TUNIT____-SAME: (double* nocapture nofree nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) [[ATTR0]] {
; IS__TUNIT____-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 3
; IS__TUNIT____-NEXT: [[T3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
; IS__TUNIT____-NEXT: ret double [[T3]]
;
; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@PR21780_only_access3_with_inbounds
; IS__CGSCC____-SAME: (double* nocapture nofree nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) [[ATTR0]] {
; IS__CGSCC____-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 3
; IS__CGSCC____-NEXT: [[T3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
; IS__CGSCC____-NEXT: ret double [[T3]]
;
%arrayidx3 = getelementptr inbounds double, double* %ptr, i64 3
%t3 = load double, double* %arrayidx3, align 8
ret double %t3
}
define double @PR21780_only_access3_without_inbounds(double* %ptr) {
; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@PR21780_only_access3_without_inbounds
; IS__TUNIT____-SAME: (double* nocapture nofree readonly align 8 [[PTR:%.*]]) [[ATTR0]] {
; IS__TUNIT____-NEXT: [[ARRAYIDX3:%.*]] = getelementptr double, double* [[PTR]], i64 3
; IS__TUNIT____-NEXT: [[T3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
; IS__TUNIT____-NEXT: ret double [[T3]]
;
; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@PR21780_only_access3_without_inbounds
; IS__CGSCC____-SAME: (double* nocapture nofree readonly align 8 [[PTR:%.*]]) [[ATTR0]] {
; IS__CGSCC____-NEXT: [[ARRAYIDX3:%.*]] = getelementptr double, double* [[PTR]], i64 3
; IS__CGSCC____-NEXT: [[T3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
; IS__CGSCC____-NEXT: ret double [[T3]]
;
%arrayidx3 = getelementptr double, double* %ptr, i64 3
%t3 = load double, double* %arrayidx3, align 8
ret double %t3
}
define double @PR21780_without_inbounds(double* %ptr) {
; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@PR21780_without_inbounds
; IS__TUNIT____-SAME: (double* nocapture nofree nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) [[ATTR0]] {
; IS__TUNIT____-NEXT: [[ARRAYIDX3:%.*]] = getelementptr double, double* [[PTR]], i64 3
; IS__TUNIT____-NEXT: [[T3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
; IS__TUNIT____-NEXT: ret double [[T3]]
;
; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@PR21780_without_inbounds
; IS__CGSCC____-SAME: (double* nocapture nofree nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) [[ATTR0]] {
; IS__CGSCC____-NEXT: [[ARRAYIDX3:%.*]] = getelementptr double, double* [[PTR]], i64 3
; IS__CGSCC____-NEXT: [[T3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
; IS__CGSCC____-NEXT: ret double [[T3]]
;
%arrayidx1 = getelementptr double, double* %ptr, i64 1
%arrayidx2 = getelementptr double, double* %ptr, i64 2
%arrayidx3 = getelementptr double, double* %ptr, i64 3
%t0 = load double, double* %ptr, align 8
%t1 = load double, double* %arrayidx1, align 8
%t2 = load double, double* %arrayidx2, align 8
%t3 = load double, double* %arrayidx3, align 8
ret double %t3
}
; Unsimplified, but still valid. Also, throw in some bogus arguments.
define void @gep0(i8* %unused, i8* %other, i8* %ptr) {
; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@gep0
; IS__TUNIT____-SAME: (i8* nocapture nofree readnone [[UNUSED:%.*]], i8* nocapture nofree nonnull writeonly dereferenceable(1) [[OTHER:%.*]], i8* nocapture nofree nonnull readonly dereferenceable(3) [[PTR:%.*]]) [[ATTR1:#.*]] {
; IS__TUNIT____-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, i8* [[PTR]], i64 2
; IS__TUNIT____-NEXT: [[T2:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
; IS__TUNIT____-NEXT: store i8 [[T2]], i8* [[OTHER]], align 1
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@gep0
; IS__CGSCC____-SAME: (i8* nocapture nofree readnone [[UNUSED:%.*]], i8* nocapture nofree nonnull writeonly dereferenceable(1) [[OTHER:%.*]], i8* nocapture nofree nonnull readonly dereferenceable(3) [[PTR:%.*]]) [[ATTR1:#.*]] {
; IS__CGSCC____-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, i8* [[PTR]], i64 2
; IS__CGSCC____-NEXT: [[T2:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
; IS__CGSCC____-NEXT: store i8 [[T2]], i8* [[OTHER]], align 1
; IS__CGSCC____-NEXT: ret void
;
%arrayidx0 = getelementptr i8, i8* %ptr, i64 0
%arrayidx1 = getelementptr i8, i8* %ptr, i64 1
%arrayidx2 = getelementptr i8, i8* %ptr, i64 2
%t0 = load i8, i8* %arrayidx0
%t1 = load i8, i8* %arrayidx1
%t2 = load i8, i8* %arrayidx2
store i8 %t2, i8* %other
ret void
}
; Order of accesses does not change computation.
; Multiple arguments may be dereferenceable.
define void @ordering(i8* %ptr1, i32* %ptr2) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@ordering
; IS__TUNIT____-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(3) [[PTR1:%.*]], i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[PTR2:%.*]]) [[ATTR2:#.*]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@ordering
; IS__CGSCC____-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(3) [[PTR1:%.*]], i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[PTR2:%.*]]) [[ATTR2:#.*]] {
; IS__CGSCC____-NEXT: ret void
;
%a20 = getelementptr i32, i32* %ptr2, i64 0
%a12 = getelementptr i8, i8* %ptr1, i64 2
%t12 = load i8, i8* %a12
%a11 = getelementptr i8, i8* %ptr1, i64 1
%t20 = load i32, i32* %a20
%a10 = getelementptr i8, i8* %ptr1, i64 0
%t10 = load i8, i8* %a10
%t11 = load i8, i8* %a11
%a21 = getelementptr i32, i32* %ptr2, i64 1
%t21 = load i32, i32* %a21
ret void
}
; Not in entry block.
define void @not_entry_but_guaranteed_to_execute(i8* %ptr) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@not_entry_but_guaranteed_to_execute
; IS__TUNIT____-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(3) [[PTR:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: entry:
; IS__TUNIT____-NEXT: br label [[EXIT:%.*]]
; IS__TUNIT____: exit:
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@not_entry_but_guaranteed_to_execute
; IS__CGSCC____-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(3) [[PTR:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: entry:
; IS__CGSCC____-NEXT: br label [[EXIT:%.*]]
; IS__CGSCC____: exit:
; IS__CGSCC____-NEXT: ret void
;
entry:
br label %exit
exit:
%arrayidx0 = getelementptr i8, i8* %ptr, i64 0
%arrayidx1 = getelementptr i8, i8* %ptr, i64 1
%arrayidx2 = getelementptr i8, i8* %ptr, i64 2
%t0 = load i8, i8* %arrayidx0
%t1 = load i8, i8* %arrayidx1
%t2 = load i8, i8* %arrayidx2
ret void
}
; Not in entry block and not guaranteed to execute.
define void @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@not_entry_not_guaranteed_to_execute
; IS__TUNIT____-SAME: (i8* nocapture nofree readnone [[PTR:%.*]], i1 [[COND:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: entry:
; IS__TUNIT____-NEXT: br i1 [[COND]], label [[LOADS:%.*]], label [[EXIT:%.*]]
; IS__TUNIT____: loads:
; IS__TUNIT____-NEXT: ret void
; IS__TUNIT____: exit:
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@not_entry_not_guaranteed_to_execute
; IS__CGSCC____-SAME: (i8* nocapture nofree readnone [[PTR:%.*]], i1 [[COND:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: entry:
; IS__CGSCC____-NEXT: br i1 [[COND]], label [[LOADS:%.*]], label [[EXIT:%.*]]
; IS__CGSCC____: loads:
; IS__CGSCC____-NEXT: ret void
; IS__CGSCC____: exit:
; IS__CGSCC____-NEXT: ret void
;
entry:
br i1 %cond, label %loads, label %exit
loads:
%arrayidx0 = getelementptr i8, i8* %ptr, i64 0
%arrayidx1 = getelementptr i8, i8* %ptr, i64 1
%arrayidx2 = getelementptr i8, i8* %ptr, i64 2
%t0 = load i8, i8* %arrayidx0
%t1 = load i8, i8* %arrayidx1
%t2 = load i8, i8* %arrayidx2
ret void
exit:
ret void
}
; The last load may not execute, so derefenceable bytes only covers the 1st two loads.
define void @partial_in_entry(i16* %ptr, i1 %cond) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@partial_in_entry
; IS__TUNIT____-SAME: (i16* nocapture nofree nonnull readnone align 2 dereferenceable(4) [[PTR:%.*]], i1 [[COND:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: entry:
; IS__TUNIT____-NEXT: br i1 [[COND]], label [[LOADS:%.*]], label [[EXIT:%.*]]
; IS__TUNIT____: loads:
; IS__TUNIT____-NEXT: ret void
; IS__TUNIT____: exit:
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@partial_in_entry
; IS__CGSCC____-SAME: (i16* nocapture nofree nonnull readnone align 2 dereferenceable(4) [[PTR:%.*]], i1 [[COND:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: entry:
; IS__CGSCC____-NEXT: br i1 [[COND]], label [[LOADS:%.*]], label [[EXIT:%.*]]
; IS__CGSCC____: loads:
; IS__CGSCC____-NEXT: ret void
; IS__CGSCC____: exit:
; IS__CGSCC____-NEXT: ret void
;
entry:
%arrayidx0 = getelementptr i16, i16* %ptr, i64 0
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
%arrayidx2 = getelementptr i16, i16* %ptr, i64 2
%t0 = load i16, i16* %arrayidx0
%t1 = load i16, i16* %arrayidx1
br i1 %cond, label %loads, label %exit
loads:
%t2 = load i16, i16* %arrayidx2
ret void
exit:
ret void
}
; The volatile load can't be used to prove a non-volatile access is allowed.
; The 2nd and 3rd loads may never execute.
define void @volatile_is_not_dereferenceable(i16* %ptr) {
; IS__TUNIT____: Function Attrs: argmemonly nofree nounwind willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@volatile_is_not_dereferenceable
; IS__TUNIT____-SAME: (i16* nofree align 2 [[PTR:%.*]]) [[ATTR3:#.*]] {
; IS__TUNIT____-NEXT: [[ARRAYIDX0:%.*]] = getelementptr i16, i16* [[PTR]], i64 0
; IS__TUNIT____-NEXT: [[T0:%.*]] = load volatile i16, i16* [[ARRAYIDX0]], align 2
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nounwind willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@volatile_is_not_dereferenceable
; IS__CGSCC____-SAME: (i16* nofree align 2 [[PTR:%.*]]) [[ATTR3:#.*]] {
; IS__CGSCC____-NEXT: [[ARRAYIDX0:%.*]] = getelementptr i16, i16* [[PTR]], i64 0
; IS__CGSCC____-NEXT: [[T0:%.*]] = load volatile i16, i16* [[ARRAYIDX0]], align 2
; IS__CGSCC____-NEXT: ret void
;
%arrayidx0 = getelementptr i16, i16* %ptr, i64 0
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
%arrayidx2 = getelementptr i16, i16* %ptr, i64 2
%t0 = load volatile i16, i16* %arrayidx0
%t1 = load i16, i16* %arrayidx1
%t2 = load i16, i16* %arrayidx2
ret void
}
; TODO: We should allow inference for atomic (but not volatile) ops.
define void @atomic_is_alright(i16* %ptr) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@atomic_is_alright
; IS__TUNIT____-SAME: (i16* nocapture nofree nonnull readnone align 2 dereferenceable(6) [[PTR:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@atomic_is_alright
; IS__CGSCC____-SAME: (i16* nocapture nofree nonnull readnone align 2 dereferenceable(6) [[PTR:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: ret void
;
%arrayidx0 = getelementptr i16, i16* %ptr, i64 0
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
%arrayidx2 = getelementptr i16, i16* %ptr, i64 2
%t0 = load atomic i16, i16* %arrayidx0 unordered, align 2
%t1 = load i16, i16* %arrayidx1
%t2 = load i16, i16* %arrayidx2
ret void
}
declare void @may_not_return()
define void @not_guaranteed_to_transfer_execution(i16* %ptr) {
; CHECK-LABEL: define {{[^@]+}}@not_guaranteed_to_transfer_execution
; CHECK-SAME: (i16* nocapture nofree nonnull readnone align 2 dereferenceable(2) [[PTR:%.*]]) {
; CHECK-NEXT: call void @may_not_return()
; CHECK-NEXT: ret void
;
%arrayidx0 = getelementptr i16, i16* %ptr, i64 0
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
%arrayidx2 = getelementptr i16, i16* %ptr, i64 2
%t0 = load i16, i16* %arrayidx0
call void @may_not_return()
%t1 = load i16, i16* %arrayidx1
%t2 = load i16, i16* %arrayidx2
ret void
}
; We must have consecutive accesses.
define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@variable_gep_index
; IS__TUNIT____-SAME: (i8* nocapture nofree readnone [[UNUSED:%.*]], i8* nocapture nofree nonnull readnone dereferenceable(1) [[PTR:%.*]], i64 [[VARIABLE_INDEX:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@variable_gep_index
; IS__CGSCC____-SAME: (i8* nocapture nofree readnone [[UNUSED:%.*]], i8* nocapture nofree nonnull readnone dereferenceable(1) [[PTR:%.*]], i64 [[VARIABLE_INDEX:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: ret void
;
%arrayidx1 = getelementptr i8, i8* %ptr, i64 %variable_index
%arrayidx2 = getelementptr i8, i8* %ptr, i64 2
%t0 = load i8, i8* %ptr
%t1 = load i8, i8* %arrayidx1
%t2 = load i8, i8* %arrayidx2
ret void
}
; Deal with >1 GEP index.
define void @multi_index_gep(<4 x i8>* %ptr) {
; FIXME: %ptr should be dereferenceable(4)
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@multi_index_gep
; IS__TUNIT____-SAME: (<4 x i8>* nocapture nofree nonnull readnone dereferenceable(1) [[PTR:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@multi_index_gep
; IS__CGSCC____-SAME: (<4 x i8>* nocapture nofree nonnull readnone dereferenceable(1) [[PTR:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: ret void
;
%arrayidx00 = getelementptr <4 x i8>, <4 x i8>* %ptr, i64 0, i64 0
%t0 = load i8, i8* %arrayidx00
ret void
}
; Could round weird bitwidths down?
define void @not_byte_multiple(i9* %ptr) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@not_byte_multiple
; IS__TUNIT____-SAME: (i9* nocapture nofree nonnull readnone align 2 dereferenceable(2) [[PTR:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@not_byte_multiple
; IS__CGSCC____-SAME: (i9* nocapture nofree nonnull readnone align 2 dereferenceable(2) [[PTR:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: ret void
;
%arrayidx0 = getelementptr i9, i9* %ptr, i64 0
%t0 = load i9, i9* %arrayidx0
ret void
}
; Missing direct access from the pointer.
define void @no_pointer_deref(i16* %ptr) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@no_pointer_deref
; IS__TUNIT____-SAME: (i16* nocapture nofree readnone align 2 [[PTR:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@no_pointer_deref
; IS__CGSCC____-SAME: (i16* nocapture nofree readnone align 2 [[PTR:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: ret void
;
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
%arrayidx2 = getelementptr i16, i16* %ptr, i64 2
%t1 = load i16, i16* %arrayidx1
%t2 = load i16, i16* %arrayidx2
ret void
}
; Out-of-order is ok, but missing access concludes dereferenceable range.
define void @non_consecutive(i32* %ptr) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@non_consecutive
; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[PTR:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@non_consecutive
; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[PTR:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: ret void
;
%arrayidx1 = getelementptr i32, i32* %ptr, i64 1
%arrayidx0 = getelementptr i32, i32* %ptr, i64 0
%arrayidx3 = getelementptr i32, i32* %ptr, i64 3
%t1 = load i32, i32* %arrayidx1
%t0 = load i32, i32* %arrayidx0
%t3 = load i32, i32* %arrayidx3
ret void
}
; Improve on existing dereferenceable attribute.
define void @more_bytes(i32* dereferenceable(8) %ptr) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@more_bytes
; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[PTR:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@more_bytes
; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[PTR:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: ret void
;
%arrayidx3 = getelementptr i32, i32* %ptr, i64 3
%arrayidx1 = getelementptr i32, i32* %ptr, i64 1
%arrayidx0 = getelementptr i32, i32* %ptr, i64 0
%arrayidx2 = getelementptr i32, i32* %ptr, i64 2
%t3 = load i32, i32* %arrayidx3
%t1 = load i32, i32* %arrayidx1
%t2 = load i32, i32* %arrayidx2
%t0 = load i32, i32* %arrayidx0
ret void
}
; Improve on existing dereferenceable_or_null attribute.
define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@more_bytes_and_not_null
; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[PTR:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@more_bytes_and_not_null
; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[PTR:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: ret void
;
%arrayidx3 = getelementptr i32, i32* %ptr, i64 3
%arrayidx1 = getelementptr i32, i32* %ptr, i64 1
%arrayidx0 = getelementptr i32, i32* %ptr, i64 0
%arrayidx2 = getelementptr i32, i32* %ptr, i64 2
%t3 = load i32, i32* %arrayidx3
%t1 = load i32, i32* %arrayidx1
%t2 = load i32, i32* %arrayidx2
%t0 = load i32, i32* %arrayidx0
ret void
}
; But don't pessimize existing dereferenceable attribute.
define void @better_bytes(i32* dereferenceable(100) %ptr) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@better_bytes
; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(100) [[PTR:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@better_bytes
; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(100) [[PTR:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: ret void
;
%arrayidx3 = getelementptr i32, i32* %ptr, i64 3
%arrayidx1 = getelementptr i32, i32* %ptr, i64 1
%arrayidx0 = getelementptr i32, i32* %ptr, i64 0
%arrayidx2 = getelementptr i32, i32* %ptr, i64 2
%t3 = load i32, i32* %arrayidx3
%t1 = load i32, i32* %arrayidx1
%t2 = load i32, i32* %arrayidx2
%t0 = load i32, i32* %arrayidx0
ret void
}
define void @bitcast(i32* %arg) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@bitcast
; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[ARG:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@bitcast
; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[ARG:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: ret void
;
%ptr = bitcast i32* %arg to float*
%arrayidx0 = getelementptr float, float* %ptr, i64 0
%arrayidx1 = getelementptr float, float* %ptr, i64 1
%t0 = load float, float* %arrayidx0
%t1 = load float, float* %arrayidx1
ret void
}
define void @bitcast_different_sizes(double* %arg1, i8* %arg2) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@bitcast_different_sizes
; IS__TUNIT____-SAME: (double* nocapture nofree nonnull readnone align 4 dereferenceable(12) [[ARG1:%.*]], i8* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[ARG2:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@bitcast_different_sizes
; IS__CGSCC____-SAME: (double* nocapture nofree nonnull readnone align 4 dereferenceable(12) [[ARG1:%.*]], i8* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[ARG2:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: ret void
;
%ptr1 = bitcast double* %arg1 to float*
%a10 = getelementptr float, float* %ptr1, i64 0
%a11 = getelementptr float, float* %ptr1, i64 1
%a12 = getelementptr float, float* %ptr1, i64 2
%ld10 = load float, float* %a10
%ld11 = load float, float* %a11
%ld12 = load float, float* %a12
%ptr2 = bitcast i8* %arg2 to i64*
%a20 = getelementptr i64, i64* %ptr2, i64 0
%a21 = getelementptr i64, i64* %ptr2, i64 1
%ld20 = load i64, i64* %a20
%ld21 = load i64, i64* %a21
ret void
}
define void @negative_offset(i32* %arg) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@negative_offset
; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ARG:%.*]]) [[ATTR2]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@negative_offset
; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ARG:%.*]]) [[ATTR2]] {
; IS__CGSCC____-NEXT: ret void
;
%ptr = bitcast i32* %arg to float*
%arrayidx0 = getelementptr float, float* %ptr, i64 0
%arrayidx1 = getelementptr float, float* %ptr, i64 -1
%t0 = load float, float* %arrayidx0
%t1 = load float, float* %arrayidx1
ret void
}
define void @stores(i32* %arg) {
; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
; IS__TUNIT____-LABEL: define {{[^@]+}}@stores
; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[ARG:%.*]]) [[ATTR4:#.*]] {
; IS__TUNIT____-NEXT: [[PTR:%.*]] = bitcast i32* [[ARG]] to float*
; IS__TUNIT____-NEXT: [[ARRAYIDX0:%.*]] = getelementptr float, float* [[PTR]], i64 0
; IS__TUNIT____-NEXT: [[ARRAYIDX1:%.*]] = getelementptr float, float* [[PTR]], i64 1
; IS__TUNIT____-NEXT: store float 1.000000e+00, float* [[ARRAYIDX0]], align 4
; IS__TUNIT____-NEXT: store float 2.000000e+00, float* [[ARRAYIDX1]], align 4
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly
; IS__CGSCC____-LABEL: define {{[^@]+}}@stores
; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[ARG:%.*]]) [[ATTR4:#.*]] {
; IS__CGSCC____-NEXT: [[PTR:%.*]] = bitcast i32* [[ARG]] to float*
; IS__CGSCC____-NEXT: [[ARRAYIDX0:%.*]] = getelementptr float, float* [[PTR]], i64 0
; IS__CGSCC____-NEXT: [[ARRAYIDX1:%.*]] = getelementptr float, float* [[PTR]], i64 1
; IS__CGSCC____-NEXT: store float 1.000000e+00, float* [[ARRAYIDX0]], align 4
; IS__CGSCC____-NEXT: store float 2.000000e+00, float* [[ARRAYIDX1]], align 4
; IS__CGSCC____-NEXT: ret void
;
%ptr = bitcast i32* %arg to float*
%arrayidx0 = getelementptr float, float* %ptr, i64 0
%arrayidx1 = getelementptr float, float* %ptr, i64 1
store float 1.0, float* %arrayidx0
store float 2.0, float* %arrayidx1
ret void
}
define void @load_store(i32* %arg) {
; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
; IS__TUNIT____-LABEL: define {{[^@]+}}@load_store
; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[ARG:%.*]]) [[ATTR4]] {
; IS__TUNIT____-NEXT: [[PTR:%.*]] = bitcast i32* [[ARG]] to float*
; IS__TUNIT____-NEXT: [[ARRAYIDX1:%.*]] = getelementptr float, float* [[PTR]], i64 1
; IS__TUNIT____-NEXT: store float 2.000000e+00, float* [[ARRAYIDX1]], align 4
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly
; IS__CGSCC____-LABEL: define {{[^@]+}}@load_store
; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[ARG:%.*]]) [[ATTR4]] {
; IS__CGSCC____-NEXT: [[PTR:%.*]] = bitcast i32* [[ARG]] to float*
; IS__CGSCC____-NEXT: [[ARRAYIDX1:%.*]] = getelementptr float, float* [[PTR]], i64 1
; IS__CGSCC____-NEXT: store float 2.000000e+00, float* [[ARRAYIDX1]], align 4
; IS__CGSCC____-NEXT: ret void
;
%ptr = bitcast i32* %arg to float*
%arrayidx0 = getelementptr float, float* %ptr, i64 0
%arrayidx1 = getelementptr float, float* %ptr, i64 1
%t1 = load float, float* %arrayidx0
store float 2.0, float* %arrayidx1
ret void
}
define void @different_size1(i32* %arg) {
; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
; IS__TUNIT____-LABEL: define {{[^@]+}}@different_size1
; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[ARG:%.*]]) [[ATTR4]] {
; IS__TUNIT____-NEXT: [[ARG_CAST:%.*]] = bitcast i32* [[ARG]] to double*
; IS__TUNIT____-NEXT: store double 0.000000e+00, double* [[ARG_CAST]], align 8
; IS__TUNIT____-NEXT: store i32 0, i32* [[ARG]], align 8
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly
; IS__CGSCC____-LABEL: define {{[^@]+}}@different_size1
; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[ARG:%.*]]) [[ATTR4]] {
; IS__CGSCC____-NEXT: [[ARG_CAST:%.*]] = bitcast i32* [[ARG]] to double*
; IS__CGSCC____-NEXT: store double 0.000000e+00, double* [[ARG_CAST]], align 8
; IS__CGSCC____-NEXT: store i32 0, i32* [[ARG]], align 8
; IS__CGSCC____-NEXT: ret void
;
%arg-cast = bitcast i32* %arg to double*
store double 0.000000e+00, double* %arg-cast
store i32 0, i32* %arg
ret void
}
define void @different_size2(i32* %arg) {
; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
; IS__TUNIT____-LABEL: define {{[^@]+}}@different_size2
; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[ARG:%.*]]) [[ATTR4]] {
; IS__TUNIT____-NEXT: store i32 0, i32* [[ARG]], align 8
; IS__TUNIT____-NEXT: [[ARG_CAST:%.*]] = bitcast i32* [[ARG]] to double*
; IS__TUNIT____-NEXT: store double 0.000000e+00, double* [[ARG_CAST]], align 8
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly
; IS__CGSCC____-LABEL: define {{[^@]+}}@different_size2
; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[ARG:%.*]]) [[ATTR4]] {
; IS__CGSCC____-NEXT: store i32 0, i32* [[ARG]], align 8
; IS__CGSCC____-NEXT: [[ARG_CAST:%.*]] = bitcast i32* [[ARG]] to double*
; IS__CGSCC____-NEXT: store double 0.000000e+00, double* [[ARG_CAST]], align 8
; IS__CGSCC____-NEXT: ret void
;
store i32 0, i32* %arg
%arg-cast = bitcast i32* %arg to double*
store double 0.000000e+00, double* %arg-cast
ret void
}
; Make use of MustBeExecuted Explorer
;
; [CFG]
; entry
; / \
; l1 l2
; | X |
; l3 l4
; \ /
; l5
; / \
; l6 l7
; \ /
; end
; According to the above CFG, we can see that instructions in l5 Block must be executed.
; Therefore, %p must be dereferenced.
;
; ATTRIBUTOR_CGSCC_NPM-LABEL: define i32 @require_cfg_analysis(i32 %c, i32* {{.*}} dereferenceable(4) %p)
define i32 @require_cfg_analysis(i32 %c, i32* %p) {
; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@require_cfg_analysis
; IS__TUNIT_OPM-SAME: (i32 [[C:%.*]], i32* nocapture nofree writeonly [[P:%.*]]) [[ATTR4:#.*]] {
; IS__TUNIT_OPM-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C]], 0
; IS__TUNIT_OPM-NEXT: br i1 [[TOBOOL1]], label [[L1:%.*]], label [[L2:%.*]]
; IS__TUNIT_OPM: l1:
; IS__TUNIT_OPM-NEXT: [[TOBOOL2:%.*]] = icmp eq i32 [[C]], 1
; IS__TUNIT_OPM-NEXT: br i1 [[TOBOOL2]], label [[L3:%.*]], label [[L4:%.*]]
; IS__TUNIT_OPM: l2:
; IS__TUNIT_OPM-NEXT: [[TOBOOL3:%.*]] = icmp eq i32 [[C]], 2
; IS__TUNIT_OPM-NEXT: br i1 [[TOBOOL3]], label [[L3]], label [[L4]]
; IS__TUNIT_OPM: l3:
; IS__TUNIT_OPM-NEXT: br label [[L5:%.*]]
; IS__TUNIT_OPM: l4:
; IS__TUNIT_OPM-NEXT: br label [[L5]]
; IS__TUNIT_OPM: l5:
; IS__TUNIT_OPM-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[C]], 4
; IS__TUNIT_OPM-NEXT: br i1 [[TOBOOL4]], label [[L6:%.*]], label [[L7:%.*]]
; IS__TUNIT_OPM: l6:
; IS__TUNIT_OPM-NEXT: store i32 0, i32* [[P]], align 4
; IS__TUNIT_OPM-NEXT: br label [[END:%.*]]
; IS__TUNIT_OPM: l7:
; IS__TUNIT_OPM-NEXT: store i32 1, i32* [[P]], align 4
; IS__TUNIT_OPM-NEXT: br label [[END]]
; IS__TUNIT_OPM: end:
; IS__TUNIT_OPM-NEXT: ret i32 1
;
; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@require_cfg_analysis
; IS__TUNIT_NPM-SAME: (i32 [[C:%.*]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) [[ATTR4:#.*]] {
; IS__TUNIT_NPM-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C]], 0
; IS__TUNIT_NPM-NEXT: br i1 [[TOBOOL1]], label [[L1:%.*]], label [[L2:%.*]]
; IS__TUNIT_NPM: l1:
; IS__TUNIT_NPM-NEXT: br label [[L4:%.*]]
; IS__TUNIT_NPM: l2:
; IS__TUNIT_NPM-NEXT: [[TOBOOL3:%.*]] = icmp eq i32 [[C]], 2
; IS__TUNIT_NPM-NEXT: br i1 [[TOBOOL3]], label [[L3:%.*]], label [[L4]]
; IS__TUNIT_NPM: l3:
; IS__TUNIT_NPM-NEXT: br label [[L5:%.*]]
; IS__TUNIT_NPM: l4:
; IS__TUNIT_NPM-NEXT: br label [[L5]]
; IS__TUNIT_NPM: l5:
; IS__TUNIT_NPM-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[C]], 4
; IS__TUNIT_NPM-NEXT: br i1 [[TOBOOL4]], label [[L6:%.*]], label [[L7:%.*]]
; IS__TUNIT_NPM: l6:
; IS__TUNIT_NPM-NEXT: store i32 0, i32* [[P]], align 4
; IS__TUNIT_NPM-NEXT: br label [[END:%.*]]
; IS__TUNIT_NPM: l7:
; IS__TUNIT_NPM-NEXT: store i32 1, i32* [[P]], align 4
; IS__TUNIT_NPM-NEXT: br label [[END]]
; IS__TUNIT_NPM: end:
; IS__TUNIT_NPM-NEXT: ret i32 1
;
; IS__CGSCC_OPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly
; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@require_cfg_analysis
; IS__CGSCC_OPM-SAME: (i32 [[C:%.*]], i32* nocapture nofree writeonly [[P:%.*]]) [[ATTR4:#.*]] {
; IS__CGSCC_OPM-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C]], 0
; IS__CGSCC_OPM-NEXT: br i1 [[TOBOOL1]], label [[L1:%.*]], label [[L2:%.*]]
; IS__CGSCC_OPM: l1:
; IS__CGSCC_OPM-NEXT: [[TOBOOL2:%.*]] = icmp eq i32 [[C]], 1
; IS__CGSCC_OPM-NEXT: br i1 [[TOBOOL2]], label [[L3:%.*]], label [[L4:%.*]]
; IS__CGSCC_OPM: l2:
; IS__CGSCC_OPM-NEXT: [[TOBOOL3:%.*]] = icmp eq i32 [[C]], 2
; IS__CGSCC_OPM-NEXT: br i1 [[TOBOOL3]], label [[L3]], label [[L4]]
; IS__CGSCC_OPM: l3:
; IS__CGSCC_OPM-NEXT: br label [[L5:%.*]]
; IS__CGSCC_OPM: l4:
; IS__CGSCC_OPM-NEXT: br label [[L5]]
; IS__CGSCC_OPM: l5:
; IS__CGSCC_OPM-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[C]], 4
; IS__CGSCC_OPM-NEXT: br i1 [[TOBOOL4]], label [[L6:%.*]], label [[L7:%.*]]
; IS__CGSCC_OPM: l6:
; IS__CGSCC_OPM-NEXT: store i32 0, i32* [[P]], align 4
; IS__CGSCC_OPM-NEXT: br label [[END:%.*]]
; IS__CGSCC_OPM: l7:
; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[P]], align 4
; IS__CGSCC_OPM-NEXT: br label [[END]]
; IS__CGSCC_OPM: end:
; IS__CGSCC_OPM-NEXT: ret i32 1
;
; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly
; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@require_cfg_analysis
; IS__CGSCC_NPM-SAME: (i32 [[C:%.*]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) [[ATTR4:#.*]] {
; IS__CGSCC_NPM-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C]], 0
; IS__CGSCC_NPM-NEXT: br i1 [[TOBOOL1]], label [[L1:%.*]], label [[L2:%.*]]
; IS__CGSCC_NPM: l1:
; IS__CGSCC_NPM-NEXT: br label [[L4:%.*]]
; IS__CGSCC_NPM: l2:
; IS__CGSCC_NPM-NEXT: [[TOBOOL3:%.*]] = icmp eq i32 [[C]], 2
; IS__CGSCC_NPM-NEXT: br i1 [[TOBOOL3]], label [[L3:%.*]], label [[L4]]
; IS__CGSCC_NPM: l3:
; IS__CGSCC_NPM-NEXT: br label [[L5:%.*]]
; IS__CGSCC_NPM: l4:
; IS__CGSCC_NPM-NEXT: br label [[L5]]
; IS__CGSCC_NPM: l5:
; IS__CGSCC_NPM-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[C]], 4
; IS__CGSCC_NPM-NEXT: br i1 [[TOBOOL4]], label [[L6:%.*]], label [[L7:%.*]]
; IS__CGSCC_NPM: l6:
; IS__CGSCC_NPM-NEXT: store i32 0, i32* [[P]], align 4
; IS__CGSCC_NPM-NEXT: br label [[END:%.*]]
; IS__CGSCC_NPM: l7:
; IS__CGSCC_NPM-NEXT: store i32 1, i32* [[P]], align 4
; IS__CGSCC_NPM-NEXT: br label [[END]]
; IS__CGSCC_NPM: end:
; IS__CGSCC_NPM-NEXT: ret i32 1
;
%tobool1 = icmp eq i32 %c, 0
br i1 %tobool1, label %l1, label %l2
l1:
%tobool2 = icmp eq i32 %c, 1
br i1 %tobool2, label %l3, label %l4
l2:
%tobool3 = icmp eq i32 %c, 2
br i1 %tobool3, label %l3, label %l4
l3:
br label %l5
l4:
br label %l5
l5:
%tobool4 = icmp eq i32 %c, 4
br i1 %tobool4, label %l6, label %l7
l6:
store i32 0, i32* %p
br label %end
l7:
store i32 1, i32* %p
br label %end
end:
ret i32 1
}

View File

@ -0,0 +1,102 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -bdce < %s | FileCheck %s
; BDCE applied to integer vectors.
define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: @test_basic(
; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, <i32 4, i32 4>
; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], <i32 1, i32 1>
; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], <i32 8, i32 8>
; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[A3]], [[B3]]
; CHECK-NEXT: [[D:%.*]] = ashr <2 x i32> [[C]], <i32 3, i32 3>
; CHECK-NEXT: ret <2 x i32> [[D]]
;
%a2 = add <2 x i32> %a, <i32 1, i32 1>
%a3 = and <2 x i32> %a2, <i32 4, i32 4>
%b2 = add <2 x i32> %b, <i32 1, i32 1>
%b3 = and <2 x i32> %b2, <i32 8, i32 8>
%c = or <2 x i32> %a3, %b3
%d = ashr <2 x i32> %c, <i32 3, i32 3>
ret <2 x i32> %d
}
; Going vector -> scalar
define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: @test_extractelement(
; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, <i32 4, i32 4>
; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], <i32 1, i32 1>
; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], <i32 8, i32 8>
; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[A3]], [[B3]]
; CHECK-NEXT: [[D:%.*]] = extractelement <2 x i32> [[C]], i32 0
; CHECK-NEXT: [[E:%.*]] = ashr i32 [[D]], 3
; CHECK-NEXT: ret i32 [[E]]
;
%a2 = add <2 x i32> %a, <i32 1, i32 1>
%a3 = and <2 x i32> %a2, <i32 4, i32 4>
%b2 = add <2 x i32> %b, <i32 1, i32 1>
%b3 = and <2 x i32> %b2, <i32 8, i32 8>
%c = or <2 x i32> %a3, %b3
%d = extractelement <2 x i32> %c, i32 0
%e = ashr i32 %d, 3
ret i32 %e
}
; Going scalar -> vector
define <2 x i32> @test_insertelement(i32 %a, i32 %b) {
; CHECK-LABEL: @test_insertelement(
; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> zeroinitializer, <i32 4, i32 4>
; CHECK-NEXT: [[Y:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0
; CHECK-NEXT: [[Y2:%.*]] = insertelement <2 x i32> [[Y]], i32 [[A:%.*]], i32 1
; CHECK-NEXT: [[Y3:%.*]] = and <2 x i32> [[Y2]], <i32 8, i32 8>
; CHECK-NEXT: [[Z:%.*]] = or <2 x i32> [[X3]], [[Y3]]
; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], <i32 3, i32 3>
; CHECK-NEXT: ret <2 x i32> [[U]]
;
%x = insertelement <2 x i32> poison, i32 %a, i32 0
%x2 = insertelement <2 x i32> %x, i32 %b, i32 1
%x3 = and <2 x i32> %x2, <i32 4, i32 4>
%y = insertelement <2 x i32> poison, i32 %b, i32 0
%y2 = insertelement <2 x i32> %y, i32 %a, i32 1
%y3 = and <2 x i32> %y2, <i32 8, i32 8>
%z = or <2 x i32> %x3, %y3
%u = ashr <2 x i32> %z, <i32 3, i32 3>
ret <2 x i32> %u
}
; Some non-int vectors and conversions
define <2 x i32> @test_conversion(<2 x i32> %a) {
; CHECK-LABEL: @test_conversion(
; CHECK-NEXT: [[A2:%.*]] = add <2 x i32> [[A:%.*]], <i32 1, i32 1>
; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> [[A2]], <i32 2, i32 2>
; CHECK-NEXT: [[X:%.*]] = uitofp <2 x i32> [[A3]] to <2 x double>
; CHECK-NEXT: [[Y:%.*]] = fadd <2 x double> [[X]], <double 1.000000e+00, double 1.000000e+00>
; CHECK-NEXT: [[Z:%.*]] = fptoui <2 x double> [[Y]] to <2 x i32>
; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], <i32 3, i32 3>
; CHECK-NEXT: ret <2 x i32> [[U]]
;
%a2 = add <2 x i32> %a, <i32 1, i32 1>
%a3 = and <2 x i32> %a2, <i32 2, i32 2>
%x = uitofp <2 x i32> %a3 to <2 x double>
%y = fadd <2 x double> %x, <double 1.0, double 1.0>
%z = fptoui <2 x double> %y to <2 x i32>
%u = ashr <2 x i32> %z, <i32 3, i32 3>
ret <2 x i32> %u
}
; Assumption invalidation (adapted from invalidate-assumptions.ll)
define <2 x i1> @test_assumption_invalidation(<2 x i1> %b, <2 x i8> %x) {
; CHECK-LABEL: @test_assumption_invalidation(
; CHECK-NEXT: [[LITTLE_NUMBER:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8>
; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl <2 x i8> zeroinitializer, <i8 1, i8 1>
; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i8> [[BIG_NUMBER]], [[LITTLE_NUMBER]]
; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i8> [[SUB]] to <2 x i1>
; CHECK-NEXT: ret <2 x i1> [[TRUNC]]
;
%setbit = or <2 x i8> %x, <i8 64, i8 64>
%little_number = zext <2 x i1> %b to <2 x i8>
%big_number = shl <2 x i8> %setbit, <i8 1, i8 1>
%sub = sub nuw <2 x i8> %big_number, %little_number
%trunc = trunc <2 x i8> %sub to <2 x i1>
ret <2 x i1> %trunc
}

View File

@ -0,0 +1,113 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -codegenprepare < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
%struct.a = type { i32, i32 }
@c = external dso_local global %struct.a, align 4
@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16
define <vscale x 4 x i32> @splat_base(i32* %base, <vscale x 4 x i64> %index, <vscale x 4 x i1> %mask) #0 {
; CHECK-LABEL: @splat_base(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <vscale x 4 x i64> [[INDEX:%.*]]
; CHECK-NEXT: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
; CHECK-NEXT: ret <vscale x 4 x i32> [[RES]]
;
%broadcast.splatinsert = insertelement <vscale x 4 x i32*> poison, i32* %base, i32 0
%broadcast.splat = shufflevector <vscale x 4 x i32*> %broadcast.splatinsert, <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer
%gep = getelementptr i32, <vscale x 4 x i32*> %broadcast.splat, <vscale x 4 x i64> %index
%res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
ret <vscale x 4 x i32> %res
}
define <vscale x 4 x i32> @splat_struct(%struct.a* %base, <vscale x 4 x i1> %mask) #0 {
; CHECK-LABEL: @splat_struct(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], %struct.a* [[BASE:%.*]], i64 0, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <vscale x 4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP2]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
; CHECK-NEXT: ret <vscale x 4 x i32> [[RES]]
;
%gep = getelementptr %struct.a, %struct.a* %base, <vscale x 4 x i64> zeroinitializer, i32 1
%res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
ret <vscale x 4 x i32> %res
}
define <vscale x 4 x i32> @scalar_index(i32* %base, i64 %index, <vscale x 4 x i1> %mask) #0 {
; CHECK-LABEL: @scalar_index(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <vscale x 4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP2]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
; CHECK-NEXT: ret <vscale x 4 x i32> [[RES]]
;
%broadcast.splatinsert = insertelement <vscale x 4 x i32*> poison, i32* %base, i32 0
%broadcast.splat = shufflevector <vscale x 4 x i32*> %broadcast.splatinsert, <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer
%gep = getelementptr i32, <vscale x 4 x i32*> %broadcast.splat, i64 %index
%res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
ret <vscale x 4 x i32> %res
}
define <vscale x 4 x i32> @splat_index(i32* %base, i64 %index, <vscale x 4 x i1> %mask) #0 {
; CHECK-LABEL: @splat_index(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <vscale x 4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP2]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
; CHECK-NEXT: ret <vscale x 4 x i32> [[RES]]
;
%broadcast.splatinsert = insertelement <vscale x 4 x i64> poison, i64 %index, i32 0
%broadcast.splat = shufflevector <vscale x 4 x i64> %broadcast.splatinsert, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
%gep = getelementptr i32, i32* %base, <vscale x 4 x i64> %broadcast.splat
%res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
ret <vscale x 4 x i32> %res
}
define <vscale x 4 x i32> @test_global_array(<vscale x 4 x i64> %indxs, <vscale x 4 x i1> %mask) #0 {
; CHECK-LABEL: @test_global_array(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @glob_array, i64 0, i64 0), <vscale x 4 x i64> [[INDXS:%.*]]
; CHECK-NEXT: [[G:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
; CHECK-NEXT: ret <vscale x 4 x i32> [[G]]
;
%p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <vscale x 4 x i64> %indxs
%g = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %p, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
ret <vscale x 4 x i32> %g
}
define <vscale x 4 x i32> @global_struct_splat(<vscale x 4 x i1> %mask) #0 {
; CHECK-LABEL: @global_struct_splat(
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> shufflevector (<vscale x 4 x i32*> insertelement (<vscale x 4 x i32*> undef, i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32 0), <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer), i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = insertelement <vscale x 4 x %struct.a*> poison, %struct.a* @c, i32 0
%2 = shufflevector <vscale x 4 x %struct.a*> %1, <vscale x 4 x %struct.a*> undef, <vscale x 4 x i32> zeroinitializer
%3 = getelementptr %struct.a, <vscale x 4 x %struct.a*> %2, <vscale x 4 x i64> zeroinitializer, i32 1
%4 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %3, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
ret <vscale x 4 x i32> %4
}
define <vscale x 4 x i32> @splat_ptr_gather(i32* %ptr, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) #0 {
; CHECK-LABEL: @splat_ptr_gather(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <vscale x 4 x i64> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> [[PASSTHRU:%.*]])
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
;
%1 = insertelement <vscale x 4 x i32*> poison, i32* %ptr, i32 0
%2 = shufflevector <vscale x 4 x i32*> %1, <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer
%3 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %2, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru)
ret <vscale x 4 x i32> %3
}
define void @splat_ptr_scatter(i32* %ptr, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %val) #0 {
; CHECK-LABEL: @splat_ptr_scatter(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <vscale x 4 x i64> zeroinitializer
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> [[VAL:%.*]], <vscale x 4 x i32*> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]])
; CHECK-NEXT: ret void
;
%1 = insertelement <vscale x 4 x i32*> poison, i32* %ptr, i32 0
%2 = shufflevector <vscale x 4 x i32*> %1, <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer
call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %2, i32 4, <vscale x 4 x i1> %mask)
ret void
}
declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
declare void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)
attributes #0 = { "target-features"="+sve" }

View File

@ -0,0 +1,76 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -codegenprepare %s | FileCheck %s
; Make sure BypassSlowDivision doesn't drop debug info
define i64 @sdiv64(i64 %a, i64 %b) {
; CHECK-LABEL: @sdiv64(
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[A:%.*]], [[B:%.*]], [[DBG6:!dbg !.*]]
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296, [[DBG6]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0, [[DBG6]]
; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP9:%.*]], [[DBG6]]
; CHECK: 4:
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[B]] to i32, [[DBG6]]
; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[A]] to i32, [[DBG6]]
; CHECK-NEXT: [[TMP7:%.*]] = udiv i32 [[TMP6]], [[TMP5]], [[DBG6]]
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64, [[DBG6]]
; CHECK-NEXT: br label [[TMP11:%.*]], [[DBG6]]
; CHECK: 9:
; CHECK-NEXT: [[TMP10:%.*]] = sdiv i64 [[A]], [[B]], [[DBG6]]
; CHECK-NEXT: br label [[TMP11]], [[DBG6]]
; CHECK: 11:
; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ [[TMP8]], [[TMP4]] ], [ [[TMP10]], [[TMP9]] ], [[DBG6]]
; CHECK-NEXT: ret i64 [[TMP12]]
;
%d = sdiv i64 %a, %b, !dbg !6
ret i64 %d
}
; FIXME: The debugloc for the rem parts end up with the dbg of the
; division.
define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
; CHECK-LABEL: @sdivrem64(
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[A:%.*]], [[B:%.*]], [[DBG6]]
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296, [[DBG6]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0, [[DBG6]]
; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP11:%.*]], [[DBG6]]
; CHECK: 4:
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[B]] to i32, [[DBG6]]
; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[A]] to i32, [[DBG6]]
; CHECK-NEXT: [[TMP7:%.*]] = udiv i32 [[TMP6]], [[TMP5]], [[DBG6]]
; CHECK-NEXT: [[TMP8:%.*]] = urem i32 [[TMP6]], [[TMP5]], [[DBG6]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64, [[DBG6]]
; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP8]] to i64, [[DBG6]]
; CHECK-NEXT: br label [[TMP14:%.*]], [[DBG6]]
; CHECK: 11:
; CHECK-NEXT: [[TMP12:%.*]] = sdiv i64 [[A]], [[B]], [[DBG6]]
; CHECK-NEXT: [[TMP13:%.*]] = srem i64 [[A]], [[B]], [[DBG6]]
; CHECK-NEXT: br label [[TMP14]], [[DBG6]]
; CHECK: 14:
; CHECK-NEXT: [[TMP15:%.*]] = phi i64 [ [[TMP9]], [[TMP4]] ], [ [[TMP12]], [[TMP11]] ], [[DBG6]]
; CHECK-NEXT: [[TMP16:%.*]] = phi i64 [ [[TMP10]], [[TMP4]] ], [ [[TMP13]], [[TMP11]] ], [[DBG6]]
; CHECK-NEXT: [[INS0:%.*]] = insertelement <2 x i64> poison, i64 [[TMP15]], i32 0
; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x i64> [[INS0]], i64 [[TMP16]], i32 1
; CHECK-NEXT: ret <2 x i64> [[INS1]]
;
%d = sdiv i64 %a, %b, !dbg !6
%r = srem i64 %a, %b, !dbg !10
%ins0 = insertelement <2 x i64> poison, i64 %d, i32 0
%ins1 = insertelement <2 x i64> %ins0, i64 %r, i32 1
ret <2 x i64> %ins1
}
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
!llvm.ident = !{!5}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "basic.c", directory: ".")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 1, !"Debug Info Version", i32 3}
!5 = !{!"clang version 3.5 "}
!6 = !DILocation(line: 3, scope: !7)
!7 = distinct !DILexicalBlock(scope: !8, file: !1, line: 3)
!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!9 = !DISubroutineType(types: !2)
!10 = !DILocation(line: 4, scope: !7)

View File

@ -0,0 +1,219 @@
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp < %s -codegenprepare -S | FileCheck -check-prefix=CHECK %s
define void @sink_add_mul(i32* %s1, i32 %x, i32* %d, i32 %n) {
; CHECK-LABEL: @sink_add_mul(
; CHECK: vector.ph:
; CHECK-NOT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
; CHECK-NOT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK: vector.body:
; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer
;
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %vector.ph, label %for.cond.cleanup
vector.ph: ; preds = %for.body.preheader
%n.vec = and i32 %n, -4
%broadcast.splatinsert8 = insertelement <4 x i32> poison, i32 %x, i32 0
%broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%0 = getelementptr inbounds i32, i32* %s1, i32 %index
%1 = bitcast i32* %0 to <4 x i32>*
%wide.load = load <4 x i32>, <4 x i32>* %1, align 4
%2 = mul nsw <4 x i32> %wide.load, %broadcast.splat9
%3 = getelementptr inbounds i32, i32* %d, i32 %index
%4 = bitcast i32* %3 to <4 x i32>*
%wide.load10 = load <4 x i32>, <4 x i32>* %4, align 4
%5 = add nsw <4 x i32> %wide.load10, %2
%6 = bitcast i32* %3 to <4 x i32>*
store <4 x i32> %5, <4 x i32>* %6, align 4
%index.next = add i32 %index, 4
%7 = icmp eq i32 %index.next, %n.vec
br i1 %7, label %for.cond.cleanup, label %vector.body
for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
ret void
}
define void @sink_add_mul_multiple(i32* %s1, i32* %s2, i32 %x, i32* %d, i32* %d2, i32 %n) {
; CHECK-LABEL: @sink_add_mul_multiple(
; CHECK: vector.ph:
; CHECK-NOT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
; CHECK-NOT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK: vector.body:
; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %x, i32 0
; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK: mul nsw <4 x i32> %wide.load, [[TMP3]]
; CHECK: [[TMP2b:%.*]] = insertelement <4 x i32> poison, i32 %x, i32 0
; CHECK: [[TMP3b:%.*]] = shufflevector <4 x i32> [[TMP2b]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK: mul nsw <4 x i32> %wide.load18, [[TMP3b]]
;
entry:
%cmp13 = icmp sgt i32 %n, 0
br i1 %cmp13, label %vector.ph, label %for.cond.cleanup
vector.ph: ; preds = %for.body.preheader
%n.vec = and i32 %n, -4
%broadcast.splatinsert15 = insertelement <4 x i32> poison, i32 %x, i32 0
%broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%0 = getelementptr inbounds i32, i32* %s1, i32 %index
%1 = bitcast i32* %0 to <4 x i32>*
%wide.load = load <4 x i32>, <4 x i32>* %1, align 4
%2 = mul nsw <4 x i32> %wide.load, %broadcast.splat16
%3 = getelementptr inbounds i32, i32* %d, i32 %index
%4 = bitcast i32* %3 to <4 x i32>*
%wide.load17 = load <4 x i32>, <4 x i32>* %4, align 4
%5 = add nsw <4 x i32> %wide.load17, %2
%6 = bitcast i32* %3 to <4 x i32>*
store <4 x i32> %5, <4 x i32>* %6, align 4
%7 = getelementptr inbounds i32, i32* %s2, i32 %index
%8 = bitcast i32* %7 to <4 x i32>*
%wide.load18 = load <4 x i32>, <4 x i32>* %8, align 4
%9 = mul nsw <4 x i32> %wide.load18, %broadcast.splat16
%10 = getelementptr inbounds i32, i32* %d2, i32 %index
%11 = bitcast i32* %10 to <4 x i32>*
%wide.load19 = load <4 x i32>, <4 x i32>* %11, align 4
%12 = add nsw <4 x i32> %wide.load19, %9
%13 = bitcast i32* %10 to <4 x i32>*
store <4 x i32> %12, <4 x i32>* %13, align 4
%index.next = add i32 %index, 4
%14 = icmp eq i32 %index.next, %n.vec
br i1 %14, label %for.cond.cleanup, label %vector.body
for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
ret void
}
define void @sink_add_sub_unsinkable(i32* %s1, i32* %s2, i32 %x, i32* %d, i32* %d2, i32 %n) {
; CHECK-LABEL: @sink_add_sub_unsinkable(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP13]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N]], -4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT15]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
;
entry:
%cmp13 = icmp sgt i32 %n, 0
br i1 %cmp13, label %vector.ph, label %for.cond.cleanup
vector.ph: ; preds = %for.body.preheader
%n.vec = and i32 %n, -4
%broadcast.splatinsert15 = insertelement <4 x i32> poison, i32 %x, i32 0
%broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%0 = getelementptr inbounds i32, i32* %s1, i32 %index
%1 = bitcast i32* %0 to <4 x i32>*
%wide.load = load <4 x i32>, <4 x i32>* %1, align 4
%2 = mul nsw <4 x i32> %wide.load, %broadcast.splat16
%3 = getelementptr inbounds i32, i32* %d, i32 %index
%4 = bitcast i32* %3 to <4 x i32>*
%wide.load17 = load <4 x i32>, <4 x i32>* %4, align 4
%5 = add nsw <4 x i32> %wide.load17, %2
%6 = bitcast i32* %3 to <4 x i32>*
store <4 x i32> %5, <4 x i32>* %6, align 4
%7 = getelementptr inbounds i32, i32* %s2, i32 %index
%8 = bitcast i32* %7 to <4 x i32>*
%wide.load18 = load <4 x i32>, <4 x i32>* %8, align 4
%9 = sub nsw <4 x i32> %broadcast.splat16, %wide.load18
%10 = getelementptr inbounds i32, i32* %d2, i32 %index
%11 = bitcast i32* %10 to <4 x i32>*
%wide.load19 = load <4 x i32>, <4 x i32>* %11, align 4
%12 = add nsw <4 x i32> %wide.load19, %9
%13 = bitcast i32* %10 to <4 x i32>*
store <4 x i32> %12, <4 x i32>* %13, align 4
%index.next = add i32 %index, 4
%14 = icmp eq i32 %index.next, %n.vec
br i1 %14, label %for.cond.cleanup, label %vector.body
for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
ret void
}
define void @sink_sub(i32* %s1, i32 %x, i32* %d, i32 %n) {
; CHECK-LABEL: @sink_sub(
; CHECK: vector.ph:
; CHECK-NOT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; CHECK-NOT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK: vector.body:
; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer
;
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %vector.ph, label %for.cond.cleanup
vector.ph: ; preds = %for.body.preheader
%n.vec = and i32 %n, -4
%broadcast.splatinsert8 = insertelement <4 x i32> poison, i32 %x, i32 0
%broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%0 = getelementptr inbounds i32, i32* %s1, i32 %index
%1 = bitcast i32* %0 to <4 x i32>*
%wide.load = load <4 x i32>, <4 x i32>* %1, align 4
%2 = sub nsw <4 x i32> %wide.load, %broadcast.splat9
%3 = getelementptr inbounds i32, i32* %d, i32 %index
%4 = bitcast i32* %3 to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %4, align 4
%index.next = add i32 %index, 4
%5 = icmp eq i32 %index.next, %n.vec
br i1 %5, label %for.cond.cleanup, label %vector.body
for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
ret void
}
define void @sink_sub_unsinkable(i32* %s1, i32 %x, i32* %d, i32 %n) {
entry:
; CHECK-LABEL: @sink_sub_unsinkable(
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N]], -4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT15]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NOT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; CHECK-NOT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer
;
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %vector.ph, label %for.cond.cleanup
vector.ph: ; preds = %for.body.preheader
%n.vec = and i32 %n, -4
%broadcast.splatinsert8 = insertelement <4 x i32> poison, i32 %x, i32 0
%broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%0 = getelementptr inbounds i32, i32* %s1, i32 %index
%1 = bitcast i32* %0 to <4 x i32>*
%wide.load = load <4 x i32>, <4 x i32>* %1, align 4
%2 = sub nsw <4 x i32> %broadcast.splat9, %wide.load
%3 = getelementptr inbounds i32, i32* %d, i32 %index
%4 = bitcast i32* %3 to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %4, align 4
%index.next = add i32 %index, 4
%5 = icmp eq i32 %index.next, %n.vec
br i1 %5, label %for.cond.cleanup, label %vector.body
for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
ret void
}

View File

@ -0,0 +1,107 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp < %s -codegenprepare -S | FileCheck -check-prefix=CHECK %s
; Sink the shufflevector/insertelement pair, followed by the trunc. The sunk instruction end up dead.
define signext i8 @dead(i16* noalias nocapture readonly %s1, i16 zeroext %x, i8* noalias nocapture %d, i32 %n) {
; CHECK-LABEL: @dead(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N:%.*]], -8
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 [[X:%.*]] to i8
; CHECK-NEXT: [[L6:%.*]] = getelementptr inbounds i16, i16* [[S1:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[L7:%.*]] = bitcast i16* [[L6]] to <8 x i16>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[L7]], align 2
; CHECK-NEXT: [[L8:%.*]] = trunc <8 x i16> [[WIDE_LOAD]] to <8 x i8>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[L9:%.*]] = mul <8 x i8> [[TMP2]], [[L8]]
; CHECK-NEXT: [[L13:%.*]] = getelementptr inbounds i8, i8* [[D:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[L14:%.*]] = bitcast i8* [[L13]] to <8 x i8>*
; CHECK-NEXT: store <8 x i8> [[L9]], <8 x i8>* [[L14]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; CHECK-NEXT: [[L15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[L15]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
; CHECK: exit:
; CHECK-NEXT: ret i8 0
;
entry:
%n.vec = and i32 %n, -8
%l0 = trunc i16 %x to i8
%l1 = insertelement <8 x i8> poison, i8 %l0, i32 0
%broadcast.splat26 = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> zeroinitializer
br label %vector.body
vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%l6 = getelementptr inbounds i16, i16* %s1, i32 %index
%l7 = bitcast i16* %l6 to <8 x i16>*
%wide.load = load <8 x i16>, <8 x i16>* %l7, align 2
%l8 = trunc <8 x i16> %wide.load to <8 x i8>
%l9 = mul <8 x i8> %broadcast.splat26, %l8
%l13 = getelementptr inbounds i8, i8* %d, i32 %index
%l14 = bitcast i8* %l13 to <8 x i8>*
store <8 x i8> %l9, <8 x i8>* %l14, align 1
%index.next = add i32 %index, 8
%l15 = icmp eq i32 %index.next, %n.vec
br i1 %l15, label %exit, label %vector.body
exit: ; preds = %vector.body
ret i8 0
}
; Same as above, but the shuffle has an extra use meaning it shouldnt be deleted
define signext i8 @alive(i16* noalias nocapture readonly %s1, i16 zeroext %x, i8* noalias nocapture %d, i32 %n) {
; CHECK-LABEL: @alive(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N:%.*]], -8
; CHECK-NEXT: [[L0:%.*]] = trunc i16 [[X:%.*]] to i8
; CHECK-NEXT: [[L1:%.*]] = insertelement <8 x i8> poison, i8 [[L0]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT26:%.*]] = shufflevector <8 x i8> [[L1]], <8 x i8> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[L2:%.*]] = sub <8 x i8> zeroinitializer, [[BROADCAST_SPLAT26]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 [[X]] to i8
; CHECK-NEXT: [[L6:%.*]] = getelementptr inbounds i16, i16* [[S1:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[L7:%.*]] = bitcast i16* [[L6]] to <8 x i16>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[L7]], align 2
; CHECK-NEXT: [[L8:%.*]] = trunc <8 x i16> [[WIDE_LOAD]] to <8 x i8>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[L9:%.*]] = mul <8 x i8> [[TMP2]], [[L8]]
; CHECK-NEXT: [[L13:%.*]] = getelementptr inbounds i8, i8* [[D:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[L14:%.*]] = bitcast i8* [[L13]] to <8 x i8>*
; CHECK-NEXT: store <8 x i8> [[L9]], <8 x i8>* [[L14]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; CHECK-NEXT: [[L15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[L15]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
; CHECK: exit:
; CHECK-NEXT: ret i8 0
;
entry:
%n.vec = and i32 %n, -8
%l0 = trunc i16 %x to i8
%l1 = insertelement <8 x i8> poison, i8 %l0, i32 0
%broadcast.splat26 = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> zeroinitializer
%l2 = sub <8 x i8> zeroinitializer, %broadcast.splat26
br label %vector.body
vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%l6 = getelementptr inbounds i16, i16* %s1, i32 %index
%l7 = bitcast i16* %l6 to <8 x i16>*
%wide.load = load <8 x i16>, <8 x i16>* %l7, align 2
%l8 = trunc <8 x i16> %wide.load to <8 x i8>
%l9 = mul <8 x i8> %broadcast.splat26, %l8
%l13 = getelementptr inbounds i8, i8* %d, i32 %index
%l14 = bitcast i8* %l13 to <8 x i8>*
store <8 x i8> %l9, <8 x i8>* %l14, align 1
%index.next = add i32 %index, 8
%l15 = icmp eq i32 %index.next, %n.vec
br i1 %l15, label %exit, label %vector.body
exit: ; preds = %vector.body
ret i8 0
}

View File

@ -0,0 +1,113 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -codegenprepare < %s | FileCheck %s
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
%struct.a = type { i32, i32 }
@c = external dso_local global %struct.a, align 4
@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16
define <4 x i32> @splat_base(i32* %base, <4 x i64> %index) {
; CHECK-LABEL: @splat_base(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <4 x i64> [[INDEX:%.*]]
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%broadcast.splatinsert = insertelement <4 x i32*> poison, i32* %base, i32 0
%broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> undef, <4 x i32> zeroinitializer
%gep = getelementptr i32, <4 x i32*> %broadcast.splat, <4 x i64> %index
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}
define <4 x i32> @splat_struct(%struct.a* %base) {
; CHECK-LABEL: @splat_struct(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], %struct.a* [[BASE:%.*]], i64 0, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%gep = getelementptr %struct.a, %struct.a* %base, <4 x i64> zeroinitializer, i32 1
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}
define <4 x i32> @scalar_index(i32* %base, i64 %index) {
; CHECK-LABEL: @scalar_index(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%broadcast.splatinsert = insertelement <4 x i32*> poison, i32* %base, i32 0
%broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> undef, <4 x i32> zeroinitializer
%gep = getelementptr i32, <4 x i32*> %broadcast.splat, i64 %index
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}
define <4 x i32> @splat_index(i32* %base, i64 %index) {
; CHECK-LABEL: @splat_index(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%broadcast.splatinsert = insertelement <4 x i64> poison, i64 %index, i32 0
%broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
%gep = getelementptr i32, i32* %base, <4 x i64> %broadcast.splat
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}
define <4 x i32> @test_global_array(<4 x i64> %indxs) {
; CHECK-LABEL: @test_global_array(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @glob_array, i64 0, i64 0), <4 x i64> [[INDXS:%.*]]
; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[G]]
;
%p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <4 x i64> %indxs
%g = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %g
}
define <4 x i32> @global_struct_splat() {
; CHECK-LABEL: @global_struct_splat(
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> <i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1)>, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%1 = insertelement <4 x %struct.a*> poison, %struct.a* @c, i32 0
%2 = shufflevector <4 x %struct.a*> %1, <4 x %struct.a*> undef, <4 x i32> zeroinitializer
%3 = getelementptr %struct.a, <4 x %struct.a*> %2, <4 x i64> zeroinitializer, i32 1
%4 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %4
}
define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthru) {
; CHECK-LABEL: @splat_ptr_gather(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]], <4 x i32> [[PASSTHRU:%.*]])
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%1 = insertelement <4 x i32*> poison, i32* %ptr, i32 0
%2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer
%3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> %mask, <4 x i32> %passthru)
ret <4 x i32> %3
}
define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) {
; CHECK-LABEL: @splat_ptr_scatter(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <4 x i64> zeroinitializer
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL:%.*]], <4 x i32*> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]])
; CHECK-NEXT: ret void
;
%1 = insertelement <4 x i32*> poison, i32* %ptr, i32 0
%2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer
call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %2, i32 4, <4 x i1> %mask)
ret void
}
declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)

View File

@ -0,0 +1,321 @@
; RUN: opt -S -codegenprepare < %s | FileCheck %s
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
@x = external global [1 x [2 x <4 x float>]]
; Can we sink single addressing mode computation to use?
define void @test1(i1 %cond, i64* %base) {
; CHECK-LABEL: @test1
; CHECK: getelementptr inbounds i8, {{.+}} 40
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
%v = load i32, i32* %casted, align 4
br label %fallthrough
fallthrough:
ret void
}
declare void @foo(i32)
; Make sure sinking two copies of addressing mode into different blocks works
define void @test2(i1 %cond, i64* %base) {
; CHECK-LABEL: @test2
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK: getelementptr inbounds i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
br i1 %cmp, label %next, label %fallthrough
next:
; CHECK-LABEL: next:
; CHECK: getelementptr inbounds i8, {{.+}} 40
%v2 = load i32, i32* %casted, align 4
call void @foo(i32 %v2)
br label %fallthrough
fallthrough:
ret void
}
; If we have two loads in the same block, only need one copy of addressing mode
; - instruction selection will duplicate if needed
define void @test3(i1 %cond, i64* %base) {
; CHECK-LABEL: @test3
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK: getelementptr inbounds i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
%v2 = load i32, i32* %casted, align 4
call void @foo(i32 %v2)
br label %fallthrough
fallthrough:
ret void
}
; Can we still sink addressing mode if there's a cold use of the
; address itself?
define void @test4(i1 %cond, i64* %base) {
; CHECK-LABEL: @test4
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK: getelementptr inbounds i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
br i1 %cmp, label %rare.1, label %fallthrough
fallthrough:
ret void
rare.1:
; CHECK-LABEL: rare.1:
; CHECK: getelementptr inbounds i8, {{.+}} 40
call void @slowpath(i32 %v1, i32* %casted) cold
br label %fallthrough
}
; Negative test - don't want to duplicate addressing into hot path
define void @test5(i1 %cond, i64* %base) {
; CHECK-LABEL: @test5
entry:
; CHECK: %addr = getelementptr inbounds
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
br i1 %cmp, label %rare.1, label %fallthrough
fallthrough:
ret void
rare.1:
call void @slowpath(i32 %v1, i32* %casted) ;; NOT COLD
br label %fallthrough
}
; Negative test - opt for size
define void @test6(i1 %cond, i64* %base) minsize {
; CHECK-LABEL: @test6
entry:
; CHECK: %addr = getelementptr
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
br i1 %cmp, label %rare.1, label %fallthrough
fallthrough:
ret void
rare.1:
call void @slowpath(i32 %v1, i32* %casted) cold
br label %fallthrough
}
; Negative test - opt for size
define void @test6_pgso(i1 %cond, i64* %base) !prof !14 {
; CHECK-LABEL: @test6
entry:
; CHECK: %addr = getelementptr
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
br i1 %cmp, label %rare.1, label %fallthrough
fallthrough:
ret void
rare.1:
call void @slowpath(i32 %v1, i32* %casted) cold
br label %fallthrough
}
; Make sure sinking two copies of addressing mode into different blocks works
; when there are cold paths for each.
define void @test7(i1 %cond, i64* %base) {
; CHECK-LABEL: @test7
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK: getelementptr inbounds i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
br i1 %cmp, label %rare.1, label %next
next:
; CHECK-LABEL: next:
; CHECK: getelementptr inbounds i8, {{.+}} 40
%v2 = load i32, i32* %casted, align 4
call void @foo(i32 %v2)
%cmp2 = icmp eq i32 %v2, 0
br i1 %cmp2, label %rare.1, label %fallthrough
fallthrough:
ret void
rare.1:
; CHECK-LABEL: rare.1:
; CHECK: getelementptr inbounds i8, {{.+}} 40
call void @slowpath(i32 %v1, i32* %casted) cold
br label %next
rare.2:
; CHECK-LABEL: rare.2:
; CHECK: getelementptr inbounds i8, {{.+}} 40
call void @slowpath(i32 %v2, i32* %casted) cold
br label %fallthrough
}
declare void @slowpath(i32, i32*)
; Make sure we don't end up in an infinite loop after we fail to sink.
; CHECK-LABEL: define void @test8
; CHECK: %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
define void @test8() {
allocas:
%aFOO_load = load float*, float** undef
%aFOO_load_ptr2int = ptrtoint float* %aFOO_load to i64
%aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> poison, i64 %aFOO_load_ptr2int, i32 0
%aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to i8*
%ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
br label %load.i145
load.i145:
%ptr.i143 = bitcast i8* %ptr to <4 x float>*
%valall.i144 = load <4 x float>, <4 x float>* %ptr.i143, align 4
%x_offset = getelementptr [1 x [2 x <4 x float>]], [1 x [2 x <4 x float>]]* @x, i32 0, i64 0
br label %pl_loop.i.i122
pl_loop.i.i122:
br label %pl_loop.i.i122
}
; Make sure we can sink address computation even
; if there is a cycle in phi nodes.
define void @test9(i1 %cond, i64* %base) {
; CHECK-LABEL: @test9
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br label %header
header:
%iv = phi i32 [0, %entry], [%iv.inc, %backedge]
%casted.loop = phi i32* [%casted, %entry], [%casted.merged, %backedge]
br i1 %cond, label %if.then, label %backedge
if.then:
call void @foo(i32 %iv)
%addr.1 = getelementptr inbounds i64, i64* %base, i64 5
%casted.1 = bitcast i64* %addr.1 to i32*
br label %backedge
backedge:
; CHECK-LABEL: backedge:
; CHECK: getelementptr inbounds i8, {{.+}} 40
%casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then]
%v = load i32, i32* %casted.merged, align 4
call void @foo(i32 %v)
%iv.inc = add i32 %iv, 1
%cmp = icmp slt i32 %iv.inc, 1000
br i1 %cmp, label %header, label %exit
exit:
ret void
}
; Make sure we can eliminate a select when both arguments perform equivalent
; address computation.
define void @test10(i1 %cond, i64* %base) {
; CHECK-LABEL: @test10
; CHECK: getelementptr inbounds i8, {{.+}} 40
; CHECK-NOT: select
entry:
%gep1 = getelementptr inbounds i64, i64* %base, i64 5
%gep1.casted = bitcast i64* %gep1 to i32*
%base.casted = bitcast i64* %base to i32*
%gep2 = getelementptr inbounds i32, i32* %base.casted, i64 10
%casted.merged = select i1 %cond, i32* %gep1.casted, i32* %gep2
%v = load i32, i32* %casted.merged, align 4
call void @foo(i32 %v)
ret void
}
; Found by fuzzer, getSExtValue of > 64 bit constant
define void @i96_mul(i1* %base, i96 %offset) {
BB:
;; RHS = 0x7FFFFFFFFFFFFFFFFFFFFFFF
%B84 = mul i96 %offset, 39614081257132168796771975167
%G23 = getelementptr i1, i1* %base, i96 %B84
store i1 false, i1* %G23
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -0,0 +1,406 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=ALL,AVX1
; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=ALL,AVX2
; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=ALL,AVX512BW
; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP
; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP
; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG
define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; AVX1-LABEL: @vector_variable_shift_right_v4i32(
; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX1-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]]
; AVX1-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SPLAT1]]
; AVX1-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[Z]], [[SPLAT2]]
; AVX1-NEXT: [[TMP3:%.*]] = select <4 x i1> [[COND]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
; AVX1-NEXT: ret <4 x i32> [[TMP3]]
;
; AVX2-LABEL: @vector_variable_shift_right_v4i32(
; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]]
; AVX2-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]]
; AVX2-NEXT: ret <4 x i32> [[SH]]
;
; AVX512BW-LABEL: @vector_variable_shift_right_v4i32(
; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX512BW-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]]
; AVX512BW-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]]
; AVX512BW-NEXT: ret <4 x i32> [[SH]]
;
; XOP-LABEL: @vector_variable_shift_right_v4i32(
; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
; XOP-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]]
; XOP-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]]
; XOP-NEXT: ret <4 x i32> [[SH]]
;
%splat1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
%splat2 = shufflevector <4 x i32> %y, <4 x i32> undef, <4 x i32> zeroinitializer
%sel = select <4 x i1> %cond, <4 x i32> %splat1, <4 x i32> %splat2
%sh = lshr <4 x i32> %z, %sel
ret <4 x i32> %sh
}
define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
; AVX1-LABEL: @vector_variable_shift_right_v16i16(
; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]]
; AVX1-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SPLAT1]]
; AVX1-NEXT: [[TMP2:%.*]] = lshr <16 x i16> [[Z]], [[SPLAT2]]
; AVX1-NEXT: [[TMP3:%.*]] = select <16 x i1> [[COND]], <16 x i16> [[TMP1]], <16 x i16> [[TMP2]]
; AVX1-NEXT: ret <16 x i16> [[TMP3]]
;
; AVX2-LABEL: @vector_variable_shift_right_v16i16(
; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]]
; AVX2-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SPLAT1]]
; AVX2-NEXT: [[TMP2:%.*]] = lshr <16 x i16> [[Z]], [[SPLAT2]]
; AVX2-NEXT: [[TMP3:%.*]] = select <16 x i1> [[COND]], <16 x i16> [[TMP1]], <16 x i16> [[TMP2]]
; AVX2-NEXT: ret <16 x i16> [[TMP3]]
;
; AVX512BW-LABEL: @vector_variable_shift_right_v16i16(
; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
; AVX512BW-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]]
; AVX512BW-NEXT: [[SH:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SEL]]
; AVX512BW-NEXT: ret <16 x i16> [[SH]]
;
; XOP-LABEL: @vector_variable_shift_right_v16i16(
; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
; XOP-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]]
; XOP-NEXT: [[SH:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SEL]]
; XOP-NEXT: ret <16 x i16> [[SH]]
;
%splat1 = shufflevector <16 x i16> %x, <16 x i16> undef, <16 x i32> zeroinitializer
%splat2 = shufflevector <16 x i16> %y, <16 x i16> undef, <16 x i32> zeroinitializer
%sel = select <16 x i1> %cond, <16 x i16> %splat1, <16 x i16> %splat2
%sh = lshr <16 x i16> %z, %sel
ret <16 x i16> %sh
}
define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
; ALL-LABEL: @vector_variable_shift_right_v32i8(
; ALL-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
; ALL-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
; ALL-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
; ALL-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]]
; ALL-NEXT: ret <32 x i8> [[SH]]
;
%splat1 = shufflevector <32 x i8> %x, <32 x i8> undef, <32 x i32> zeroinitializer
%splat2 = shufflevector <32 x i8> %y, <32 x i8> undef, <32 x i32> zeroinitializer
%sel = select <32 x i1> %cond, <32 x i8> %splat1, <32 x i8> %splat2
%sh = lshr <32 x i8> %z, %sel
ret <32 x i8> %sh
}
; PR37428 - https://bugs.llvm.org/show_bug.cgi?id=37428
define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture readonly %control, i32 %count, i32 %amt0, i32 %amt1, i32 %x) {
; AVX1-LABEL: @vector_variable_shift_left_loop(
; AVX1-NEXT: entry:
; AVX1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0
; AVX1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64
; AVX1-NEXT: br i1 [[CMP16]], label [[VECTOR_PH:%.*]], label [[EXIT:%.*]]
; AVX1: vector.ph:
; AVX1-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
; AVX1-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> poison, i32 [[AMT0:%.*]], i32 0
; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX1-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> poison, i32 [[AMT1:%.*]], i32 0
; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX1-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; AVX1-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX1-NEXT: br label [[VECTOR_BODY:%.*]]
; AVX1: vector.body:
; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]]
; AVX1-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>*
; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
; AVX1-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
; AVX1-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]]
; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX1-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP4]]
; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX1-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP6]]
; AVX1-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP5]], <4 x i32> [[TMP7]]
; AVX1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]]
; AVX1-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
; AVX1-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP10]], align 4
; AVX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; AVX1-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; AVX1-NEXT: br i1 [[TMP11]], label [[EXIT]], label [[VECTOR_BODY]]
; AVX1: exit:
; AVX1-NEXT: ret void
;
; AVX2-LABEL: @vector_variable_shift_left_loop(
; AVX2-NEXT: entry:
; AVX2-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0
; AVX2-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64
; AVX2-NEXT: br i1 [[CMP16]], label [[VECTOR_PH:%.*]], label [[EXIT:%.*]]
; AVX2: vector.ph:
; AVX2-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
; AVX2-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> poison, i32 [[AMT0:%.*]], i32 0
; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> poison, i32 [[AMT1:%.*]], i32 0
; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; AVX2-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: br label [[VECTOR_BODY:%.*]]
; AVX2: vector.body:
; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]]
; AVX2-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>*
; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
; AVX2-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
; AVX2-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]]
; AVX2-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP3]]
; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]]
; AVX2-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
; AVX2-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP6]], align 4
; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; AVX2-NEXT: br i1 [[TMP7]], label [[EXIT]], label [[VECTOR_BODY]]
; AVX2: exit:
; AVX2-NEXT: ret void
;
; AVX512BW-LABEL: @vector_variable_shift_left_loop(
; AVX512BW-NEXT: entry:
; AVX512BW-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0
; AVX512BW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64
; AVX512BW-NEXT: br i1 [[CMP16]], label [[VECTOR_PH:%.*]], label [[EXIT:%.*]]
; AVX512BW: vector.ph:
; AVX512BW-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
; AVX512BW-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> poison, i32 [[AMT0:%.*]], i32 0
; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX512BW-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> poison, i32 [[AMT1:%.*]], i32 0
; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX512BW-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; AVX512BW-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX512BW-NEXT: br label [[VECTOR_BODY:%.*]]
; AVX512BW: vector.body:
; AVX512BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512BW-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]]
; AVX512BW-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>*
; AVX512BW-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
; AVX512BW-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
; AVX512BW-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]]
; AVX512BW-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP3]]
; AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]]
; AVX512BW-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
; AVX512BW-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP6]], align 4
; AVX512BW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; AVX512BW-NEXT: br i1 [[TMP7]], label [[EXIT]], label [[VECTOR_BODY]]
; AVX512BW: exit:
; AVX512BW-NEXT: ret void
;
; XOP-LABEL: @vector_variable_shift_left_loop(
; XOP-NEXT: entry:
; XOP-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0
; XOP-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64
; XOP-NEXT: br i1 [[CMP16]], label [[VECTOR_PH:%.*]], label [[EXIT:%.*]]
; XOP: vector.ph:
; XOP-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
; XOP-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> poison, i32 [[AMT0:%.*]], i32 0
; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer
; XOP-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> poison, i32 [[AMT1:%.*]], i32 0
; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer
; XOP-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
; XOP-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer
; XOP-NEXT: br label [[VECTOR_BODY:%.*]]
; XOP: vector.body:
; XOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; XOP-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]]
; XOP-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>*
; XOP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
; XOP-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
; XOP-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]]
; XOP-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP3]]
; XOP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]]
; XOP-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
; XOP-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP6]], align 4
; XOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; XOP-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; XOP-NEXT: br i1 [[TMP7]], label [[EXIT]], label [[VECTOR_BODY]]
; XOP: exit:
; XOP-NEXT: ret void
;
entry:
%cmp16 = icmp sgt i32 %count, 0
%wide.trip.count = zext i32 %count to i64
br i1 %cmp16, label %vector.ph, label %exit
vector.ph:
%n.vec = and i64 %wide.trip.count, 4294967292
%splatinsert18 = insertelement <4 x i32> poison, i32 %amt0, i32 0
%splat1 = shufflevector <4 x i32> %splatinsert18, <4 x i32> undef, <4 x i32> zeroinitializer
%splatinsert20 = insertelement <4 x i32> poison, i32 %amt1, i32 0
%splat2 = shufflevector <4 x i32> %splatinsert20, <4 x i32> undef, <4 x i32> zeroinitializer
%splatinsert22 = insertelement <4 x i32> poison, i32 %x, i32 0
%splat3 = shufflevector <4 x i32> %splatinsert22, <4 x i32> undef, <4 x i32> zeroinitializer
br label %vector.body
vector.body:
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%0 = getelementptr inbounds i8, i8* %control, i64 %index
%1 = bitcast i8* %0 to <4 x i8>*
%wide.load = load <4 x i8>, <4 x i8>* %1, align 1
%2 = icmp eq <4 x i8> %wide.load, zeroinitializer
%3 = select <4 x i1> %2, <4 x i32> %splat1, <4 x i32> %splat2
%4 = shl <4 x i32> %splat3, %3
%5 = getelementptr inbounds i32, i32* %arr, i64 %index
%6 = bitcast i32* %5 to <4 x i32>*
store <4 x i32> %4, <4 x i32>* %6, align 4
%index.next = add i64 %index, 4
%7 = icmp eq i64 %index.next, %n.vec
br i1 %7, label %exit, label %vector.body
exit:
ret void
}
; PR37426 - https://bugs.llvm.org/show_bug.cgi?id=37426
; If we don't have real vector shift instructions (AVX1), convert the funnel
; shift into 2 funnel shifts and sink the splat shuffles into the loop.
define void @fancierRotate2(i32* %arr, i8* %control, i32 %rot0, i32 %rot1) {
; AVX1-LABEL: @fancierRotate2(
; AVX1-NEXT: entry:
; AVX1-NEXT: [[I0:%.*]] = insertelement <8 x i32> poison, i32 [[ROT0:%.*]], i32 0
; AVX1-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: [[I1:%.*]] = insertelement <8 x i32> poison, i32 [[ROT1:%.*]], i32 0
; AVX1-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: br label [[LOOP:%.*]]
; AVX1: loop:
; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
; AVX1-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]]
; AVX1-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to <8 x i8>*
; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[T1]], align 1
; AVX1-NEXT: [[T2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
; AVX1-NEXT: [[SHAMT:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[S0]], <8 x i32> [[S1]]
; AVX1-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]]
; AVX1-NEXT: [[T5:%.*]] = bitcast i32* [[T4]] to <8 x i32>*
; AVX1-NEXT: [[WIDE_LOAD21:%.*]] = load <8 x i32>, <8 x i32>* [[T5]], align 4
; AVX1-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[TMP0]])
; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[TMP2]])
; AVX1-NEXT: [[TMP4:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[TMP1]], <8 x i32> [[TMP3]]
; AVX1-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* [[T5]], align 4
; AVX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
; AVX1-NEXT: [[T7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; AVX1-NEXT: br i1 [[T7]], label [[EXIT:%.*]], label [[LOOP]]
; AVX1: exit:
; AVX1-NEXT: ret void
;
; AVX2-LABEL: @fancierRotate2(
; AVX2-NEXT: entry:
; AVX2-NEXT: [[I0:%.*]] = insertelement <8 x i32> poison, i32 [[ROT0:%.*]], i32 0
; AVX2-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: [[I1:%.*]] = insertelement <8 x i32> poison, i32 [[ROT1:%.*]], i32 0
; AVX2-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: br label [[LOOP:%.*]]
; AVX2: loop:
; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
; AVX2-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]]
; AVX2-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to <8 x i8>*
; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[T1]], align 1
; AVX2-NEXT: [[T2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
; AVX2-NEXT: [[SHAMT:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[S0]], <8 x i32> [[S1]]
; AVX2-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]]
; AVX2-NEXT: [[T5:%.*]] = bitcast i32* [[T4]] to <8 x i32>*
; AVX2-NEXT: [[WIDE_LOAD21:%.*]] = load <8 x i32>, <8 x i32>* [[T5]], align 4
; AVX2-NEXT: [[ROT:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[SHAMT]])
; AVX2-NEXT: store <8 x i32> [[ROT]], <8 x i32>* [[T5]], align 4
; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
; AVX2-NEXT: [[T7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; AVX2-NEXT: br i1 [[T7]], label [[EXIT:%.*]], label [[LOOP]]
; AVX2: exit:
; AVX2-NEXT: ret void
;
; AVX512BW-LABEL: @fancierRotate2(
; AVX512BW-NEXT: entry:
; AVX512BW-NEXT: [[I0:%.*]] = insertelement <8 x i32> poison, i32 [[ROT0:%.*]], i32 0
; AVX512BW-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer
; AVX512BW-NEXT: [[I1:%.*]] = insertelement <8 x i32> poison, i32 [[ROT1:%.*]], i32 0
; AVX512BW-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer
; AVX512BW-NEXT: br label [[LOOP:%.*]]
; AVX512BW: loop:
; AVX512BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
; AVX512BW-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]]
; AVX512BW-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to <8 x i8>*
; AVX512BW-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[T1]], align 1
; AVX512BW-NEXT: [[T2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
; AVX512BW-NEXT: [[SHAMT:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[S0]], <8 x i32> [[S1]]
; AVX512BW-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]]
; AVX512BW-NEXT: [[T5:%.*]] = bitcast i32* [[T4]] to <8 x i32>*
; AVX512BW-NEXT: [[WIDE_LOAD21:%.*]] = load <8 x i32>, <8 x i32>* [[T5]], align 4
; AVX512BW-NEXT: [[ROT:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[SHAMT]])
; AVX512BW-NEXT: store <8 x i32> [[ROT]], <8 x i32>* [[T5]], align 4
; AVX512BW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
; AVX512BW-NEXT: [[T7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; AVX512BW-NEXT: br i1 [[T7]], label [[EXIT:%.*]], label [[LOOP]]
; AVX512BW: exit:
; AVX512BW-NEXT: ret void
;
; XOP-LABEL: @fancierRotate2(
; XOP-NEXT: entry:
; XOP-NEXT: [[I0:%.*]] = insertelement <8 x i32> poison, i32 [[ROT0:%.*]], i32 0
; XOP-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer
; XOP-NEXT: [[I1:%.*]] = insertelement <8 x i32> poison, i32 [[ROT1:%.*]], i32 0
; XOP-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer
; XOP-NEXT: br label [[LOOP:%.*]]
; XOP: loop:
; XOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
; XOP-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]]
; XOP-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to <8 x i8>*
; XOP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[T1]], align 1
; XOP-NEXT: [[T2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
; XOP-NEXT: [[SHAMT:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[S0]], <8 x i32> [[S1]]
; XOP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]]
; XOP-NEXT: [[T5:%.*]] = bitcast i32* [[T4]] to <8 x i32>*
; XOP-NEXT: [[WIDE_LOAD21:%.*]] = load <8 x i32>, <8 x i32>* [[T5]], align 4
; XOP-NEXT: [[ROT:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[SHAMT]])
; XOP-NEXT: store <8 x i32> [[ROT]], <8 x i32>* [[T5]], align 4
; XOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
; XOP-NEXT: [[T7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; XOP-NEXT: br i1 [[T7]], label [[EXIT:%.*]], label [[LOOP]]
; XOP: exit:
; XOP-NEXT: ret void
;
entry:
%i0 = insertelement <8 x i32> poison, i32 %rot0, i32 0
%s0 = shufflevector <8 x i32> %i0, <8 x i32> undef, <8 x i32> zeroinitializer
%i1 = insertelement <8 x i32> poison, i32 %rot1, i32 0
%s1 = shufflevector <8 x i32> %i1, <8 x i32> undef, <8 x i32> zeroinitializer
br label %loop
loop:
%index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
%t0 = getelementptr inbounds i8, i8* %control, i64 %index
%t1 = bitcast i8* %t0 to <8 x i8>*
%wide.load = load <8 x i8>, <8 x i8>* %t1, align 1
%t2 = icmp eq <8 x i8> %wide.load, zeroinitializer
%shamt = select <8 x i1> %t2, <8 x i32> %s0, <8 x i32> %s1
%t4 = getelementptr inbounds i32, i32* %arr, i64 %index
%t5 = bitcast i32* %t4 to <8 x i32>*
%wide.load21 = load <8 x i32>, <8 x i32>* %t5, align 4
%rot = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %wide.load21, <8 x i32> %wide.load21, <8 x i32> %shamt)
store <8 x i32> %rot, <8 x i32>* %t5, align 4
%index.next = add i64 %index, 8
%t7 = icmp eq i64 %index.next, 1024
br i1 %t7, label %exit, label %loop
exit:
ret void
}
declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) #1
; Check that every instruction inserted by -codegenprepare has a debug location.
; DEBUG: CheckModuleDebugify: PASS

View File

@ -0,0 +1,257 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -codegenprepare -mcpu=corei7 %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE2
; RUN: opt -S -codegenprepare -mcpu=bdver2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP
; RUN: opt -S -codegenprepare -mcpu=core-avx2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2
; RUN: opt -S -codegenprepare -mcpu=skylake-avx512 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512BW
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin10.9.0"
define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) {
; CHECK-LABEL: @test_8bit(
; CHECK-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK: if_true:
; CHECK-NEXT: ret <16 x i8> [[MASK]]
; CHECK: if_false:
; CHECK-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]]
; CHECK-NEXT: ret <16 x i8> [[RES]]
;
%mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer
br i1 %tst, label %if_true, label %if_false
if_true:
ret <16 x i8> %mask
if_false:
%res = shl <16 x i8> %lhs, %mask
ret <16 x i8> %res
}
define <8 x i16> @test_16bit(<8 x i16> %lhs, <8 x i16> %tmp, i1 %tst) {
; CHECK-SSE2-LABEL: @test_16bit(
; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK-SSE2: if_true:
; CHECK-SSE2-NEXT: ret <8 x i16> [[MASK]]
; CHECK-SSE2: if_false:
; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP]], <8 x i16> undef, <8 x i32> zeroinitializer
; CHECK-SSE2-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[TMP1]]
; CHECK-SSE2-NEXT: ret <8 x i16> [[RES]]
;
; CHECK-XOP-LABEL: @test_16bit(
; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK-XOP: if_true:
; CHECK-XOP-NEXT: ret <8 x i16> [[MASK]]
; CHECK-XOP: if_false:
; CHECK-XOP-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[MASK]]
; CHECK-XOP-NEXT: ret <8 x i16> [[RES]]
;
; CHECK-AVX2-LABEL: @test_16bit(
; CHECK-AVX2-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
; CHECK-AVX2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK-AVX2: if_true:
; CHECK-AVX2-NEXT: ret <8 x i16> [[MASK]]
; CHECK-AVX2: if_false:
; CHECK-AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP]], <8 x i16> undef, <8 x i32> zeroinitializer
; CHECK-AVX2-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[TMP1]]
; CHECK-AVX2-NEXT: ret <8 x i16> [[RES]]
;
; CHECK-AVX512BW-LABEL: @test_16bit(
; CHECK-AVX512BW-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
; CHECK-AVX512BW-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK-AVX512BW: if_true:
; CHECK-AVX512BW-NEXT: ret <8 x i16> [[MASK]]
; CHECK-AVX512BW: if_false:
; CHECK-AVX512BW-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[MASK]]
; CHECK-AVX512BW-NEXT: ret <8 x i16> [[RES]]
;
%mask = shufflevector <8 x i16> %tmp, <8 x i16> undef, <8 x i32> zeroinitializer
br i1 %tst, label %if_true, label %if_false
if_true:
ret <8 x i16> %mask
if_false:
%res = shl <8 x i16> %lhs, %mask
ret <8 x i16> %res
}
define <4 x i32> @test_notsplat(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) {
; CHECK-LABEL: @test_notsplat(
; CHECK-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK: if_true:
; CHECK-NEXT: ret <4 x i32> [[MASK]]
; CHECK: if_false:
; CHECK-NEXT: [[RES:%.*]] = shl <4 x i32> [[LHS:%.*]], [[MASK]]
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
br i1 %tst, label %if_true, label %if_false
if_true:
ret <4 x i32> %mask
if_false:
%res = shl <4 x i32> %lhs, %mask
ret <4 x i32> %res
}
define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) {
; CHECK-SSE2-LABEL: @test_32bit(
; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK-SSE2: if_true:
; CHECK-SSE2-NEXT: ret <4 x i32> [[MASK]]
; CHECK-SSE2: if_false:
; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
; CHECK-SSE2-NEXT: [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[TMP1]]
; CHECK-SSE2-NEXT: ret <4 x i32> [[RES]]
;
; CHECK-XOP-LABEL: @test_32bit(
; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK-XOP: if_true:
; CHECK-XOP-NEXT: ret <4 x i32> [[MASK]]
; CHECK-XOP: if_false:
; CHECK-XOP-NEXT: [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[MASK]]
; CHECK-XOP-NEXT: ret <4 x i32> [[RES]]
;
; CHECK-AVX-LABEL: @test_32bit(
; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK-AVX: if_true:
; CHECK-AVX-NEXT: ret <4 x i32> [[MASK]]
; CHECK-AVX: if_false:
; CHECK-AVX-NEXT: [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[MASK]]
; CHECK-AVX-NEXT: ret <4 x i32> [[RES]]
;
%mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
br i1 %tst, label %if_true, label %if_false
if_true:
ret <4 x i32> %mask
if_false:
%res = ashr <4 x i32> %lhs, %mask
ret <4 x i32> %res
}
define <2 x i64> @test_64bit(<2 x i64> %lhs, <2 x i64> %tmp, i1 %tst) {
; CHECK-SSE2-LABEL: @test_64bit(
; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer
; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK-SSE2: if_true:
; CHECK-SSE2-NEXT: ret <2 x i64> [[MASK]]
; CHECK-SSE2: if_false:
; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <2 x i32> zeroinitializer
; CHECK-SSE2-NEXT: [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[TMP1]]
; CHECK-SSE2-NEXT: ret <2 x i64> [[RES]]
;
; CHECK-XOP-LABEL: @test_64bit(
; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer
; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK-XOP: if_true:
; CHECK-XOP-NEXT: ret <2 x i64> [[MASK]]
; CHECK-XOP: if_false:
; CHECK-XOP-NEXT: [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[MASK]]
; CHECK-XOP-NEXT: ret <2 x i64> [[RES]]
;
; CHECK-AVX-LABEL: @test_64bit(
; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer
; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK-AVX: if_true:
; CHECK-AVX-NEXT: ret <2 x i64> [[MASK]]
; CHECK-AVX: if_false:
; CHECK-AVX-NEXT: [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[MASK]]
; CHECK-AVX-NEXT: ret <2 x i64> [[RES]]
;
%mask = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
br i1 %tst, label %if_true, label %if_false
if_true:
ret <2 x i64> %mask
if_false:
%res = lshr <2 x i64> %lhs, %mask
ret <2 x i64> %res
}
define void @funnel_splatvar(i32* nocapture %arr, i32 %rot) {
; CHECK-SSE2-LABEL: @funnel_splatvar(
; CHECK-SSE2-NEXT: entry:
; CHECK-SSE2-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <8 x i32> poison, i32 [[ROT:%.*]], i32 0
; CHECK-SSE2-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-SSE2: vector.body:
; CHECK-SSE2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-SSE2-NEXT: [[T0:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]]
; CHECK-SSE2-NEXT: [[T1:%.*]] = bitcast i32* [[T0]] to <8 x i32>*
; CHECK-SSE2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[T1]], align 4
; CHECK-SSE2-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT15]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-SSE2-NEXT: [[T2:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD]], <8 x i32> [[WIDE_LOAD]], <8 x i32> [[TMP0]])
; CHECK-SSE2-NEXT: store <8 x i32> [[T2]], <8 x i32>* [[T1]], align 4
; CHECK-SSE2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
; CHECK-SSE2-NEXT: [[T3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
; CHECK-SSE2-NEXT: br i1 [[T3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
; CHECK-SSE2: for.cond.cleanup:
; CHECK-SSE2-NEXT: ret void
;
; CHECK-XOP-LABEL: @funnel_splatvar(
; CHECK-XOP-NEXT: entry:
; CHECK-XOP-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <8 x i32> poison, i32 [[ROT:%.*]], i32 0
; CHECK-XOP-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT15]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-XOP-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-XOP: vector.body:
; CHECK-XOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-XOP-NEXT: [[T0:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]]
; CHECK-XOP-NEXT: [[T1:%.*]] = bitcast i32* [[T0]] to <8 x i32>*
; CHECK-XOP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[T1]], align 4
; CHECK-XOP-NEXT: [[T2:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD]], <8 x i32> [[WIDE_LOAD]], <8 x i32> [[BROADCAST_SPLAT16]])
; CHECK-XOP-NEXT: store <8 x i32> [[T2]], <8 x i32>* [[T1]], align 4
; CHECK-XOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
; CHECK-XOP-NEXT: [[T3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
; CHECK-XOP-NEXT: br i1 [[T3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
; CHECK-XOP: for.cond.cleanup:
; CHECK-XOP-NEXT: ret void
;
; CHECK-AVX-LABEL: @funnel_splatvar(
; CHECK-AVX-NEXT: entry:
; CHECK-AVX-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <8 x i32> poison, i32 [[ROT:%.*]], i32 0
; CHECK-AVX-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT15]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-AVX-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-AVX: vector.body:
; CHECK-AVX-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-AVX-NEXT: [[T0:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]]
; CHECK-AVX-NEXT: [[T1:%.*]] = bitcast i32* [[T0]] to <8 x i32>*
; CHECK-AVX-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[T1]], align 4
; CHECK-AVX-NEXT: [[T2:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD]], <8 x i32> [[WIDE_LOAD]], <8 x i32> [[BROADCAST_SPLAT16]])
; CHECK-AVX-NEXT: store <8 x i32> [[T2]], <8 x i32>* [[T1]], align 4
; CHECK-AVX-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
; CHECK-AVX-NEXT: [[T3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
; CHECK-AVX-NEXT: br i1 [[T3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
; CHECK-AVX: for.cond.cleanup:
; CHECK-AVX-NEXT: ret void
;
entry:
%broadcast.splatinsert15 = insertelement <8 x i32> poison, i32 %rot, i32 0
%broadcast.splat16 = shufflevector <8 x i32> %broadcast.splatinsert15, <8 x i32> undef, <8 x i32> zeroinitializer
br label %vector.body
vector.body:
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%t0 = getelementptr inbounds i32, i32* %arr, i64 %index
%t1 = bitcast i32* %t0 to <8 x i32>*
%wide.load = load <8 x i32>, <8 x i32>* %t1, align 4
%t2 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %wide.load, <8 x i32> %wide.load, <8 x i32> %broadcast.splat16)
store <8 x i32> %t2, <8 x i32>* %t1, align 4
%index.next = add i64 %index, 8
%t3 = icmp eq i64 %index.next, 65536
br i1 %t3, label %for.cond.cleanup, label %vector.body
for.cond.cleanup:
ret void
}
declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)

View File

@ -0,0 +1,42 @@
; RUN: opt < %s -basic-aa -gvn -S | FileCheck %s
declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
; This test ensures that masked scatter and gather operations, which take vectors of pointers,
; do not have pointer aliasing ignored when being processed.
; No scatter/gather calls should end up eliminated
; CHECK: llvm.masked.gather
; CHECK: llvm.masked.gather
; CHECK: llvm.masked.scatter
; CHECK: llvm.masked.gather
; CHECK: llvm.masked.scatter
; CHECK: llvm.masked.gather
define spir_kernel void @test(<2 x i32*> %in1, <2 x i32*> %in2, i32* %out) {
entry:
; Just some temporary storage
%tmp.0 = alloca i32
%tmp.1 = alloca i32
%tmp.i = insertelement <2 x i32*> poison, i32* %tmp.0, i32 0
%tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1
; Read from in1 and in2
%in1.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
%in2.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
; Store in1 to the allocas
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
; Read in1 from the allocas
; This gather should alias the scatter we just saw
%tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
; Store in2 to the allocas
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
; Read in2 from the allocas
; This gather should alias the scatter we just saw, and not be eliminated
%tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
; Store in2 to out for good measure
%tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0
%tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1
store i32 %tmp.v.1.0, i32* %out
%out.1 = getelementptr i32, i32* %out, i32 1
store i32 %tmp.v.1.1, i32* %out.1
ret void
}

View File

@ -0,0 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -gvn -S %s | FileCheck %s
; Reduced test case from
; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=24278
; Make sure we do not crash when dealing with a vector constant expression.
define <4 x i64*> @test(i64* %ptr) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L3:%.*]] = load i64, i64* [[PTR:%.*]], align 4
; CHECK-NEXT: [[I6:%.*]] = insertelement <4 x i64*> getelementptr (i64, i64* null, <4 x i64> <i64 poison, i64 poison, i64 poison, i64 -128>), i64* undef, i64 [[L3]]
; CHECK-NEXT: ret <4 x i64*> [[I6]]
;
entry:
%B9 = sdiv i16 -32768, 256
%L3 = load i64, i64* %ptr, align 4
%B3 = sub i16 0, %B9
%0 = insertelement <4 x i16> poison, i16 %B3, i32 3
%1 = sub <4 x i16> zeroinitializer, %0
%2 = sext <4 x i16> %1 to <4 x i32>
%3 = getelementptr inbounds i64, i64* null, <4 x i32> %2
%I6 = insertelement <4 x i64*> %3, i64* undef, i64 %L3
ret <4 x i64*> %I6
}

View File

@ -0,0 +1,456 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -gvn -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5"
target triple = "x86_64-unknown-linux-gnu"
define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
; CHECK-LABEL: @f0(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i64 [[VAL:%.*]], i64* [[LOC:%.*]], align 8
; CHECK-NEXT: br i1 [[ALWAYSFALSE:%.*]], label [[NEVERTAKEN:%.*]], label [[ALWAYSTAKEN:%.*]]
; CHECK: neverTaken:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64* [[LOC]] to i8 addrspace(4)**
; CHECK-NEXT: [[PTR:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)** [[LOC_BC]], align 8
; CHECK-NEXT: store i8 5, i8 addrspace(4)* [[PTR]], align 1
; CHECK-NEXT: ret void
; CHECK: alwaysTaken:
; CHECK-NEXT: ret void
;
entry:
store i64 %val, i64* %loc
br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
neverTaken:
%loc.bc = bitcast i64* %loc to i8 addrspace(4)**
%ptr = load i8 addrspace(4)*, i8 addrspace(4)** %loc.bc
store i8 5, i8 addrspace(4)* %ptr
ret void
alwaysTaken:
ret void
}
define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
; CHECK-LABEL: @f1(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i8 addrspace(4)* [[VAL:%.*]], i8 addrspace(4)** [[LOC:%.*]], align 8
; CHECK-NEXT: br i1 [[ALWAYSFALSE:%.*]], label [[NEVERTAKEN:%.*]], label [[ALWAYSTAKEN:%.*]]
; CHECK: neverTaken:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)** [[LOC]] to i64*
; CHECK-NEXT: [[INT:%.*]] = load i64, i64* [[LOC_BC]], align 8
; CHECK-NEXT: ret i64 [[INT]]
; CHECK: alwaysTaken:
; CHECK-NEXT: ret i64 42
;
entry:
store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
neverTaken:
%loc.bc = bitcast i8 addrspace(4)** %loc to i64*
%int = load i64, i64* %loc.bc
ret i64 %int
alwaysTaken:
ret i64 42
}
;; Note: For terseness, we stop using the %alwaysfalse trick for the
;; tests below and just exercise the bits of forwarding logic directly.
declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture, i8, i64, i1) nounwind
; Can't forward as the load might be dead. (Pretend we wrote out the alwaysfalse idiom above.)
define i8 addrspace(4)* @neg_forward_memset(i8 addrspace(4)* addrspace(4)* %loc) {
; CHECK-LABEL: @neg_forward_memset(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8 7, i64 8, i1 false)
; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]], align 8
; CHECK-NEXT: ret i8 addrspace(4)* [[REF]]
;
entry:
%loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8 7, i64 8, i1 false)
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc
ret i8 addrspace(4)* %ref
}
define <1 x i8 addrspace(4)*> @neg_forward_memset_vload(<1 x i8 addrspace(4)*> addrspace(4)* %loc) {
; CHECK-LABEL: @neg_forward_memset_vload(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <1 x i8 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8 7, i64 8, i1 false)
; CHECK-NEXT: [[REF:%.*]] = load <1 x i8 addrspace(4)*>, <1 x i8 addrspace(4)*> addrspace(4)* [[LOC]], align 8
; CHECK-NEXT: ret <1 x i8 addrspace(4)*> [[REF]]
;
entry:
%loc.bc = bitcast <1 x i8 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8 7, i64 8, i1 false)
%ref = load <1 x i8 addrspace(4)*>, <1 x i8 addrspace(4)*> addrspace(4)* %loc
ret <1 x i8 addrspace(4)*> %ref
}
; Can forward since we can do so w/o breaking types
define i8 addrspace(4)* @forward_memset_zero(i8 addrspace(4)* addrspace(4)* %loc) {
; CHECK-LABEL: @forward_memset_zero(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8 0, i64 8, i1 false)
; CHECK-NEXT: ret i8 addrspace(4)* null
;
entry:
%loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8 0, i64 8, i1 false)
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc
ret i8 addrspace(4)* %ref
}
; Can't forward as the load might be dead. (Pretend we wrote out the alwaysfalse idiom above.)
define i8 addrspace(4)* @neg_forward_store(i8 addrspace(4)* addrspace(4)* %loc) {
; CHECK-LABEL: @neg_forward_store(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i64 addrspace(4)*
; CHECK-NEXT: store i64 5, i64 addrspace(4)* [[LOC_BC]], align 8
; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]], align 8
; CHECK-NEXT: ret i8 addrspace(4)* [[REF]]
;
entry:
%loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i64 addrspace(4)*
store i64 5, i64 addrspace(4)* %loc.bc
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc
ret i8 addrspace(4)* %ref
}
define <1 x i8 addrspace(4)*> @neg_forward_store_vload(<1 x i8 addrspace(4)*> addrspace(4)* %loc) {
; CHECK-LABEL: @neg_forward_store_vload(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <1 x i8 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i64 addrspace(4)*
; CHECK-NEXT: store i64 5, i64 addrspace(4)* [[LOC_BC]], align 8
; CHECK-NEXT: [[REF:%.*]] = load <1 x i8 addrspace(4)*>, <1 x i8 addrspace(4)*> addrspace(4)* [[LOC]], align 8
; CHECK-NEXT: ret <1 x i8 addrspace(4)*> [[REF]]
;
entry:
%loc.bc = bitcast <1 x i8 addrspace(4)*> addrspace(4)* %loc to i64 addrspace(4)*
store i64 5, i64 addrspace(4)* %loc.bc
%ref = load <1 x i8 addrspace(4)*>, <1 x i8 addrspace(4)*> addrspace(4)* %loc
ret <1 x i8 addrspace(4)*> %ref
}
; Nulls have known bit patterns, so we can forward
define i8 addrspace(4)* @forward_store_zero(i8 addrspace(4)* addrspace(4)* %loc) {
; CHECK-LABEL: @forward_store_zero(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i64 addrspace(4)*
; CHECK-NEXT: store i64 0, i64 addrspace(4)* [[LOC_BC]], align 8
; CHECK-NEXT: ret i8 addrspace(4)* null
;
entry:
%loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i64 addrspace(4)*
store i64 0, i64 addrspace(4)* %loc.bc
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc
ret i8 addrspace(4)* %ref
}
; Nulls have known bit patterns, so we can forward
define i8 addrspace(4)* @forward_store_zero2(i8 addrspace(4)* addrspace(4)* %loc) {
; CHECK-LABEL: @forward_store_zero2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to <2 x i32> addrspace(4)*
; CHECK-NEXT: store <2 x i32> zeroinitializer, <2 x i32> addrspace(4)* [[LOC_BC]], align 8
; CHECK-NEXT: ret i8 addrspace(4)* null
;
entry:
%loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to <2 x i32> addrspace(4)*
store <2 x i32> zeroinitializer, <2 x i32> addrspace(4)* %loc.bc
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc
ret i8 addrspace(4)* %ref
}
@NonZeroConstant = constant <4 x i64> <i64 3, i64 3, i64 3, i64 3>
@NonZeroConstant2 = constant <4 x i64 addrspace(4)*> <
i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3),
i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3),
i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3),
i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3)>
@ZeroConstant = constant <4 x i64> zeroinitializer
; Can't forward as the load might be dead. (Pretend we wrote out the alwaysfalse idiom above.)
define i8 addrspace(4)* @neg_forward_memcopy(i8 addrspace(4)* addrspace(4)* %loc) {
; CHECK-LABEL: @neg_forward_memcopy(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 8, i1 false)
; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]], align 8
; CHECK-NEXT: ret i8 addrspace(4)* [[REF]]
;
entry:
%loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*
%src.bc = bitcast <4 x i64>* @NonZeroConstant to i8*
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false)
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc
ret i8 addrspace(4)* %ref
}
define i64 addrspace(4)* @neg_forward_memcopy2(i64 addrspace(4)* addrspace(4)* %loc) {
; CHECK-LABEL: @neg_forward_memcopy2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 8, i1 false)
; CHECK-NEXT: [[REF:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* [[LOC]], align 8
; CHECK-NEXT: ret i64 addrspace(4)* [[REF]]
;
entry:
%loc.bc = bitcast i64 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*
%src.bc = bitcast <4 x i64>* @NonZeroConstant to i8*
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false)
%ref = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* %loc
ret i64 addrspace(4)* %ref
}
; TODO: missed optimization
define i8 addrspace(4)* @forward_memcopy(i8 addrspace(4)* addrspace(4)* %loc) {
; CHECK-LABEL: @forward_memcopy(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false)
; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]], align 8
; CHECK-NEXT: ret i8 addrspace(4)* [[REF]]
;
entry:
%loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*
%src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false)
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc
ret i8 addrspace(4)* %ref
}
define i64 addrspace(4)* @forward_memcopy2(i64 addrspace(4)* addrspace(4)* %loc) {
; CHECK-LABEL: @forward_memcopy2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false)
; CHECK-NEXT: ret i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3)
;
entry:
%loc.bc = bitcast i64 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*
%src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false)
%ref = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* %loc
ret i64 addrspace(4)* %ref
}
define <1 x i8 addrspace(4)*> @neg_forward_memcpy_vload(<1 x i8 addrspace(4)*> addrspace(4)* %loc) {
; CHECK-LABEL: @neg_forward_memcpy_vload(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <1 x i8 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 8, i1 false)
; CHECK-NEXT: [[REF:%.*]] = load <1 x i8 addrspace(4)*>, <1 x i8 addrspace(4)*> addrspace(4)* [[LOC]], align 8
; CHECK-NEXT: ret <1 x i8 addrspace(4)*> [[REF]]
;
entry:
%loc.bc = bitcast <1 x i8 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)*
%src.bc = bitcast <4 x i64>* @NonZeroConstant to i8*
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false)
%ref = load <1 x i8 addrspace(4)*>, <1 x i8 addrspace(4)*> addrspace(4)* %loc
ret <1 x i8 addrspace(4)*> %ref
}
define <4 x i64 addrspace(4)*> @neg_forward_memcpy_vload2(<4 x i64 addrspace(4)*> addrspace(4)* %loc) {
; CHECK-LABEL: @neg_forward_memcpy_vload2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 32, i1 false)
; CHECK-NEXT: [[REF:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* [[LOC]], align 32
; CHECK-NEXT: ret <4 x i64 addrspace(4)*> [[REF]]
;
entry:
%loc.bc = bitcast <4 x i64 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)*
%src.bc = bitcast <4 x i64>* @NonZeroConstant to i8*
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false)
%ref = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* %loc
ret <4 x i64 addrspace(4)*> %ref
}
define <4 x i64> @neg_forward_memcpy_vload3(<4 x i64> addrspace(4)* %loc) {
; CHECK-LABEL: @neg_forward_memcpy_vload3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false)
; CHECK-NEXT: [[REF:%.*]] = load <4 x i64>, <4 x i64> addrspace(4)* [[LOC]], align 32
; CHECK-NEXT: ret <4 x i64> [[REF]]
;
entry:
%loc.bc = bitcast <4 x i64> addrspace(4)* %loc to i8 addrspace(4)*
%src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false)
%ref = load <4 x i64>, <4 x i64> addrspace(4)* %loc
ret <4 x i64> %ref
}
define <1 x i64 addrspace(4)*> @forward_memcpy_vload3(<4 x i64 addrspace(4)*> addrspace(4)* %loc) {
; CHECK-LABEL: @forward_memcpy_vload3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false)
; CHECK-NEXT: ret <1 x i64 addrspace(4)*> <i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3)>
;
entry:
%loc.bc = bitcast <4 x i64 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)*
%src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false)
%ref = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* %loc
%val = extractelement <4 x i64 addrspace(4)*> %ref, i32 0
%ret = insertelement <1 x i64 addrspace(4)*> poison, i64 addrspace(4)* %val, i32 0
ret <1 x i64 addrspace(4)*> %ret
}
; Can forward since we can do so w/o breaking types
define i8 addrspace(4)* @forward_memcpy_zero(i8 addrspace(4)* addrspace(4)* %loc) {
; CHECK-LABEL: @forward_memcpy_zero(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @ZeroConstant to i8*), i64 8, i1 false)
; CHECK-NEXT: ret i8 addrspace(4)* null
;
entry:
%loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*
%src.bc = bitcast <4 x i64>* @ZeroConstant to i8*
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false)
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc
ret i8 addrspace(4)* %ref
}
declare void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* nocapture, i8* nocapture, i64, i1) nounwind
; Same as the neg_forward_store cases, but for non defs.
; (Pretend we wrote out the alwaysfalse idiom above.)
define i8 addrspace(4)* @neg_store_clobber(i8 addrspace(4)* addrspace(4)* %loc) {
; CHECK-LABEL: @neg_store_clobber(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to <2 x i64> addrspace(4)*
; CHECK-NEXT: store <2 x i64> <i64 4, i64 4>, <2 x i64> addrspace(4)* [[LOC_BC]], align 16
; CHECK-NEXT: [[LOC_OFF:%.*]] = getelementptr i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]], i64 1
; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC_OFF]], align 8
; CHECK-NEXT: ret i8 addrspace(4)* [[REF]]
;
entry:
%loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to <2 x i64> addrspace(4)*
store <2 x i64> <i64 4, i64 4>, <2 x i64> addrspace(4)* %loc.bc
%loc.off = getelementptr i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc, i64 1
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc.off
ret i8 addrspace(4)* %ref
}
declare void @use(<2 x i64>) inaccessiblememonly
; Same as the neg_forward_store cases, but for non defs.
; (Pretend we wrote out the alwaysfalse idiom above.)
define i8 addrspace(4)* @neg_load_clobber(i8 addrspace(4)* addrspace(4)* %loc) {
; CHECK-LABEL: @neg_load_clobber(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to <2 x i64> addrspace(4)*
; CHECK-NEXT: [[V:%.*]] = load <2 x i64>, <2 x i64> addrspace(4)* [[LOC_BC]], align 16
; CHECK-NEXT: call void @use(<2 x i64> [[V]])
; CHECK-NEXT: [[LOC_OFF:%.*]] = getelementptr i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]], i64 1
; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC_OFF]], align 8
; CHECK-NEXT: ret i8 addrspace(4)* [[REF]]
;
entry:
%loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to <2 x i64> addrspace(4)*
%v = load <2 x i64>, <2 x i64> addrspace(4)* %loc.bc
call void @use(<2 x i64> %v)
%loc.off = getelementptr i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc, i64 1
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc.off
ret i8 addrspace(4)* %ref
}
define i8 addrspace(4)* @store_clobber_zero(i8 addrspace(4)* addrspace(4)* %loc) {
; CHECK-LABEL: @store_clobber_zero(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to <2 x i64> addrspace(4)*
; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64> addrspace(4)* [[LOC_BC]], align 16
; CHECK-NEXT: [[LOC_OFF:%.*]] = getelementptr i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]], i64 1
; CHECK-NEXT: ret i8 addrspace(4)* null
;
entry:
%loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to <2 x i64> addrspace(4)*
store <2 x i64> zeroinitializer, <2 x i64> addrspace(4)* %loc.bc
%loc.off = getelementptr i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc, i64 1
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc.off
ret i8 addrspace(4)* %ref
}
define void @smaller_vector(i8* %p) {
; CHECK-LABEL: @smaller_vector(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = bitcast i8* [[P:%.*]] to <4 x i64 addrspace(4)*>*
; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[P]] to <2 x i64 addrspace(4)*>*
; CHECK-NEXT: [[V4:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* [[A]], align 32
; CHECK-NEXT: [[V2:%.*]] = load <2 x i64 addrspace(4)*>, <2 x i64 addrspace(4)*>* [[B]], align 32
; CHECK-NEXT: call void @use.v2(<2 x i64 addrspace(4)*> [[V2]])
; CHECK-NEXT: call void @use.v4(<4 x i64 addrspace(4)*> [[V4]])
; CHECK-NEXT: ret void
;
entry:
%a = bitcast i8* %p to <4 x i64 addrspace(4)*>*
%b = bitcast i8* %p to <2 x i64 addrspace(4)*>*
%v4 = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* %a, align 32
%v2 = load <2 x i64 addrspace(4)*>, <2 x i64 addrspace(4)*>* %b, align 32
call void @use.v2(<2 x i64 addrspace(4)*> %v2)
call void @use.v4(<4 x i64 addrspace(4)*> %v4)
ret void
}
define i64 addrspace(4)* @vector_extract(i8* %p) {
; CHECK-LABEL: @vector_extract(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = bitcast i8* [[P:%.*]] to <4 x i64 addrspace(4)*>*
; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[P]] to i64 addrspace(4)**
; CHECK-NEXT: [[V4:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* [[A]], align 32
; CHECK-NEXT: [[RES:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)** [[B]], align 32
; CHECK-NEXT: call void @use.v4(<4 x i64 addrspace(4)*> [[V4]])
; CHECK-NEXT: ret i64 addrspace(4)* [[RES]]
;
entry:
%a = bitcast i8* %p to <4 x i64 addrspace(4)*>*
%b = bitcast i8* %p to i64 addrspace(4)**
%v4 = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* %a, align 32
%res = load i64 addrspace(4)*, i64 addrspace(4)** %b, align 32
call void @use.v4(<4 x i64 addrspace(4)*> %v4)
ret i64 addrspace(4)* %res
}
declare void @use.v2(<2 x i64 addrspace(4)*>)
declare void @use.v4(<4 x i64 addrspace(4)*>)
define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
; CHECK-LABEL: @multini(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i8 addrspace(4)* [[VAL:%.*]], i8 addrspace(4)** [[LOC:%.*]], align 8
; CHECK-NEXT: br i1 [[ALWAYSFALSE:%.*]], label [[NEVERTAKEN:%.*]], label [[ALWAYSTAKEN:%.*]]
; CHECK: neverTaken:
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)** [[LOC]] to i8 addrspace(5)**
; CHECK-NEXT: [[DIFFERENTAS:%.*]] = load i8 addrspace(5)*, i8 addrspace(5)** [[LOC_BC]], align 8
; CHECK-NEXT: ret i8 addrspace(5)* [[DIFFERENTAS]]
; CHECK: alwaysTaken:
; CHECK-NEXT: ret i8 addrspace(5)* null
;
entry:
store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
neverTaken:
%loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)**
%differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc
ret i8 addrspace(5)* %differentas
alwaysTaken:
ret i8 addrspace(5)* null
}

View File

@ -0,0 +1,143 @@
; RUN: opt -data-layout=A5 -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
; Regression tests from old HSAIL addrspacecast optimization pass
@data = internal addrspace(1) global [100 x double] [double 0.00, double 1.000000e-01, double 2.000000e-01, double 3.000000e-01, double 4.000000e-01, double 5.000000e-01, double 6.000000e-01, double 7.000000e-01, double 8.000000e-01, double 9.000000e-01, double 1.00, double 1.10, double 1.20, double 1.30, double 1.40, double 1.50, double 1.60, double 1.70, double 1.80, double 1.90, double 2.00, double 2.10, double 2.20, double 2.30, double 2.40, double 2.50, double 2.60, double 2.70, double 2.80, double 2.90, double 3.00, double 3.10, double 3.20, double 3.30, double 3.40, double 3.50, double 3.60, double 3.70, double 3.80, double 3.90, double 4.00, double 4.10, double 4.20, double 4.30, double 4.40, double 4.50, double 4.60, double 4.70, double 4.80, double 4.90, double 5.00, double 5.10, double 5.20, double 5.30, double 5.40, double 5.50, double 5.60, double 5.70, double 5.80, double 5.90, double 6.00, double 6.10, double 6.20, double 6.30, double 6.40, double 6.50, double 6.60, double 6.70, double 6.80, double 6.90, double 7.00, double 7.10, double 7.20, double 7.30, double 7.40, double 7.50, double 7.60, double 7.70, double 7.80, double 7.90, double 8.00, double 8.10, double 8.20, double 8.30, double 8.40, double 8.50, double 8.60, double 8.70, double 8.80, double 8.90, double 9.00, double 9.10, double 9.20, double 9.30, double 9.40, double 9.50, double 9.60, double 9.70, double 9.80, double 9.90], align 8
; Should generate flat load
; CHECK-LABEL: @generic_address_bitcast_const(
; CHECK: %vecload1 = load <2 x double>, <2 x double> addrspace(1)* bitcast (double addrspace(1)* getelementptr inbounds ([100 x double], [100 x double] addrspace(1)* @data, i64 0, i64 4) to <2 x double> addrspace(1)*), align 8
define amdgpu_kernel void @generic_address_bitcast_const(i64 %arg0, i32 addrspace(1)* nocapture %results) #0 {
entry:
%tmp1 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = add i64 %tmp2, %arg0
%vecload1 = load <2 x double>, <2 x double>* bitcast (double* getelementptr ([100 x double], [100 x double]* addrspacecast ([100 x double] addrspace(1)* @data to [100 x double]*), i64 0, i64 4) to <2 x double>*), align 8
%cmp = fcmp ord <2 x double> %vecload1, zeroinitializer
%sext = sext <2 x i1> %cmp to <2 x i64>
%tmp4 = extractelement <2 x i64> %sext, i64 0
%tmp5 = extractelement <2 x i64> %sext, i64 1
%tmp6 = and i64 %tmp4, %tmp5
%tmp7 = lshr i64 %tmp6, 63
%tmp8 = trunc i64 %tmp7 to i32
%idxprom = and i64 %tmp3, 4294967295
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %results, i64 %idxprom
store i32 %tmp8, i32 addrspace(1)* %arrayidx, align 4
ret void
}
@generic_address_bug9749.val = internal addrspace(1) global float 0.0, align 4
declare i32 @_Z9get_fencePv(i8*)
%opencl.pipe_t = type opaque
; This is a compile time assert bug, but we still want to check optimization
; is performed to generate ld_global.
; CHECK-LABEL: @generic_address_pipe_bug9673(
; CHECK: %tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32 addrspace(3)*
; CHECK: %add.ptr = getelementptr inbounds i32, i32 addrspace(3)* %tmp1, i32 2
; CHECK: %tmp2 = load i32, i32 addrspace(3)* %add.ptr, align 4
define amdgpu_kernel void @generic_address_pipe_bug9673(%opencl.pipe_t addrspace(3)* nocapture %in_pipe, i32 addrspace(1)* nocapture %dst) #0 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32 addrspace(3)*
%add.ptr = getelementptr inbounds i32, i32 addrspace(3)* %tmp1, i32 2
%tmp2 = load i32, i32 addrspace(3)* %add.ptr, align 4
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %dst, i32 %tmp
store i32 %tmp2, i32 addrspace(1)* %arrayidx, align 4
ret void
}
; Should generate flat load
; CHECK-LABEL: @generic_address_bug9749(
; CHECK: br i1
; CHECK: load float, float*
; CHECK: br label
define amdgpu_kernel void @generic_address_bug9749(i32 addrspace(1)* nocapture %results) #0 {
entry:
%ptr = alloca float*, align 8, addrspace(5)
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
store float 0x3FB99999A0000000, float addrspace(1)* @generic_address_bug9749.val, align 4
store volatile float* addrspacecast (float addrspace(1)* @generic_address_bug9749.val to float*), float* addrspace(5)* %ptr, align 8
%tmp2 = load volatile float*, float* addrspace(5)* %ptr, align 8
%tmp3 = load float, float addrspace(1)* @generic_address_bug9749.val, align 4
%tmp4 = bitcast float* %tmp2 to i8*
%call.i = call i32 @_Z9get_fencePv(i8* %tmp4) #1
%switch.i.i = icmp ult i32 %call.i, 4
br i1 %switch.i.i, label %if.end.i, label %helperFunction.exit
if.end.i: ; preds = %entry
%tmp5 = load float, float* %tmp2, align 4
%not.cmp.i = fcmp oeq float %tmp5, %tmp3
%phitmp = zext i1 %not.cmp.i to i32
br label %helperFunction.exit
helperFunction.exit: ; preds = %if.end.i, %entry
%retval.0.i = phi i32 [ 0, %entry ], [ %phitmp, %if.end.i ]
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %results, i64 %tmp1
store i32 %retval.0.i, i32 addrspace(1)* %arrayidx, align 4
ret void
}
; CHECK-LABEL: @generic_address_opt_phi_bug9776_simple_phi_kernel(
; CHECK: phi i32 addrspace(3)*
; CHECK: store i32 %i.03, i32 addrspace(3)* %
define amdgpu_kernel void @generic_address_opt_phi_bug9776_simple_phi_kernel(i32 addrspace(3)* nocapture %in, i32 %numElems) #0 {
entry:
%cmp1 = icmp eq i32 %numElems, 0
br i1 %cmp1, label %for.end, label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
%tmp = addrspacecast i32 addrspace(3)* %in to i32*
br label %for.body
for.body: ; preds = %for.body, %for.body.lr.ph
%i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%ptr.02 = phi i32* [ %tmp, %for.body.lr.ph ], [ %add.ptr, %for.body ]
store i32 %i.03, i32* %ptr.02, align 4
%add.ptr = getelementptr inbounds i32, i32* %ptr.02, i64 4
%inc = add nuw i32 %i.03, 1
%exitcond = icmp eq i32 %inc, %numElems
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; CHECK-LABEL: @generic_address_bug9899(
; CHECK: %vecload = load <2 x i32>, <2 x i32> addrspace(3)*
; CHECK: store <2 x i32> %tmp16, <2 x i32> addrspace(3)*
define amdgpu_kernel void @generic_address_bug9899(i64 %arg0, i32 addrspace(3)* nocapture %sourceA, i32 addrspace(3)* nocapture %destValues) #0 {
entry:
%tmp1 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = add i64 %tmp2, %arg0
%sext = shl i64 %tmp3, 32
%tmp4 = addrspacecast i32 addrspace(3)* %destValues to i32*
%tmp5 = addrspacecast i32 addrspace(3)* %sourceA to i32*
%tmp6 = ashr exact i64 %sext, 31
%tmp7 = getelementptr inbounds i32, i32* %tmp5, i64 %tmp6
%arrayidx_v4 = bitcast i32* %tmp7 to <2 x i32>*
%vecload = load <2 x i32>, <2 x i32>* %arrayidx_v4, align 4
%tmp8 = extractelement <2 x i32> %vecload, i32 0
%tmp9 = extractelement <2 x i32> %vecload, i32 1
%tmp10 = icmp eq i32 %tmp8, 0
%tmp11 = select i1 %tmp10, i32 32, i32 %tmp8
%tmp12 = icmp eq i32 %tmp9, 0
%tmp13 = select i1 %tmp12, i32 32, i32 %tmp9
%tmp14 = getelementptr inbounds i32, i32* %tmp4, i64 %tmp6
%tmp15 = insertelement <2 x i32> poison, i32 %tmp11, i32 0
%tmp16 = insertelement <2 x i32> %tmp15, i32 %tmp13, i32 1
%arrayidx_v41 = bitcast i32* %tmp14 to <2 x i32>*
store <2 x i32> %tmp16, <2 x i32>* %arrayidx_v41, align 4
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind readnone }

View File

@ -0,0 +1,357 @@
; RUN: opt < %s -inferattrs -S | FileCheck %s
; Determine dereference-ability before unused loads get deleted:
; https://bugs.llvm.org/show_bug.cgi?id=21780
define <4 x double> @PR21780(double* %ptr) {
; CHECK-LABEL: @PR21780(double* %ptr)
; GEP of index 0 is simplified away.
%arrayidx1 = getelementptr inbounds double, double* %ptr, i64 1
%arrayidx2 = getelementptr inbounds double, double* %ptr, i64 2
%arrayidx3 = getelementptr inbounds double, double* %ptr, i64 3
%t0 = load double, double* %ptr, align 8
%t1 = load double, double* %arrayidx1, align 8
%t2 = load double, double* %arrayidx2, align 8
%t3 = load double, double* %arrayidx3, align 8
%vecinit0 = insertelement <4 x double> poison, double %t0, i32 0
%vecinit1 = insertelement <4 x double> %vecinit0, double %t1, i32 1
%vecinit2 = insertelement <4 x double> %vecinit1, double %t2, i32 2
%vecinit3 = insertelement <4 x double> %vecinit2, double %t3, i32 3
%shuffle = shufflevector <4 x double> %vecinit3, <4 x double> %vecinit3, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
ret <4 x double> %shuffle
}
define double @PR21780_only_access3_with_inbounds(double* %ptr) {
; CHECK-LABEL: @PR21780_only_access3_with_inbounds(double* %ptr)
%arrayidx3 = getelementptr inbounds double, double* %ptr, i64 3
%t3 = load double, double* %arrayidx3, align 8
ret double %t3
}
define double @PR21780_only_access3_without_inbounds(double* %ptr) {
; CHECK-LABEL: @PR21780_only_access3_without_inbounds(double* %ptr)
%arrayidx3 = getelementptr double, double* %ptr, i64 3
%t3 = load double, double* %arrayidx3, align 8
ret double %t3
}
define double @PR21780_without_inbounds(double* %ptr) {
; CHECK-LABEL: @PR21780_without_inbounds(double* %ptr)
%arrayidx1 = getelementptr double, double* %ptr, i64 1
%arrayidx2 = getelementptr double, double* %ptr, i64 2
%arrayidx3 = getelementptr double, double* %ptr, i64 3
%t0 = load double, double* %ptr, align 8
%t1 = load double, double* %arrayidx1, align 8
%t2 = load double, double* %arrayidx2, align 8
%t3 = load double, double* %arrayidx3, align 8
ret double %t3
}
; Unsimplified, but still valid. Also, throw in some bogus arguments.
define void @gep0(i8* %unused, i8* %other, i8* %ptr) {
; CHECK-LABEL: @gep0(i8* %unused, i8* %other, i8* %ptr)
%arrayidx0 = getelementptr i8, i8* %ptr, i64 0
%arrayidx1 = getelementptr i8, i8* %ptr, i64 1
%arrayidx2 = getelementptr i8, i8* %ptr, i64 2
%t0 = load i8, i8* %arrayidx0
%t1 = load i8, i8* %arrayidx1
%t2 = load i8, i8* %arrayidx2
store i8 %t2, i8* %other
ret void
}
; Order of accesses does not change computation.
; Multiple arguments may be dereferenceable.
define void @ordering(i8* %ptr1, i32* %ptr2) {
; CHECK-LABEL: @ordering(i8* %ptr1, i32* %ptr2)
%a20 = getelementptr i32, i32* %ptr2, i64 0
%a12 = getelementptr i8, i8* %ptr1, i64 2
%t12 = load i8, i8* %a12
%a11 = getelementptr i8, i8* %ptr1, i64 1
%t20 = load i32, i32* %a20
%a10 = getelementptr i8, i8* %ptr1, i64 0
%t10 = load i8, i8* %a10
%t11 = load i8, i8* %a11
%a21 = getelementptr i32, i32* %ptr2, i64 1
%t21 = load i32, i32* %a21
ret void
}
; Not in entry block.
define void @not_entry_but_guaranteed_to_execute(i8* %ptr) {
; CHECK-LABEL: @not_entry_but_guaranteed_to_execute(i8* %ptr)
entry:
br label %exit
exit:
%arrayidx0 = getelementptr i8, i8* %ptr, i64 0
%arrayidx1 = getelementptr i8, i8* %ptr, i64 1
%arrayidx2 = getelementptr i8, i8* %ptr, i64 2
%t0 = load i8, i8* %arrayidx0
%t1 = load i8, i8* %arrayidx1
%t2 = load i8, i8* %arrayidx2
ret void
}
; Not in entry block and not guaranteed to execute.
define void @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) {
; CHECK-LABEL: @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond)
entry:
br i1 %cond, label %loads, label %exit
loads:
%arrayidx0 = getelementptr i8, i8* %ptr, i64 0
%arrayidx1 = getelementptr i8, i8* %ptr, i64 1
%arrayidx2 = getelementptr i8, i8* %ptr, i64 2
%t0 = load i8, i8* %arrayidx0
%t1 = load i8, i8* %arrayidx1
%t2 = load i8, i8* %arrayidx2
ret void
exit:
ret void
}
; The last load may not execute, so derefenceable bytes only covers the 1st two loads.
define void @partial_in_entry(i16* %ptr, i1 %cond) {
; CHECK-LABEL: @partial_in_entry(i16* %ptr, i1 %cond)
entry:
%arrayidx0 = getelementptr i16, i16* %ptr, i64 0
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
%arrayidx2 = getelementptr i16, i16* %ptr, i64 2
%t0 = load i16, i16* %arrayidx0
%t1 = load i16, i16* %arrayidx1
br i1 %cond, label %loads, label %exit
loads:
%t2 = load i16, i16* %arrayidx2
ret void
exit:
ret void
}
; The volatile load can't be used to prove a non-volatile access is allowed.
; The 2nd and 3rd loads may never execute.
define void @volatile_is_not_dereferenceable(i16* %ptr) {
; CHECK-LABEL: @volatile_is_not_dereferenceable(i16* %ptr)
%arrayidx0 = getelementptr i16, i16* %ptr, i64 0
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
%arrayidx2 = getelementptr i16, i16* %ptr, i64 2
%t0 = load volatile i16, i16* %arrayidx0
%t1 = load i16, i16* %arrayidx1
%t2 = load i16, i16* %arrayidx2
ret void
}
; TODO: We should allow inference for atomic (but not volatile) ops.
define void @atomic_is_alright(i16* %ptr) {
; CHECK-LABEL: @atomic_is_alright(i16* %ptr)
%arrayidx0 = getelementptr i16, i16* %ptr, i64 0
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
%arrayidx2 = getelementptr i16, i16* %ptr, i64 2
%t0 = load atomic i16, i16* %arrayidx0 unordered, align 2
%t1 = load i16, i16* %arrayidx1
%t2 = load i16, i16* %arrayidx2
ret void
}
declare void @may_not_return()
define void @not_guaranteed_to_transfer_execution(i16* %ptr) {
; CHECK-LABEL: @not_guaranteed_to_transfer_execution(i16* %ptr)
%arrayidx0 = getelementptr i16, i16* %ptr, i64 0
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
%arrayidx2 = getelementptr i16, i16* %ptr, i64 2
%t0 = load i16, i16* %arrayidx0
call void @may_not_return()
%t1 = load i16, i16* %arrayidx1
%t2 = load i16, i16* %arrayidx2
ret void
}
; We must have consecutive accesses.
define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) {
; CHECK-LABEL: @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index)
%arrayidx1 = getelementptr i8, i8* %ptr, i64 %variable_index
%arrayidx2 = getelementptr i8, i8* %ptr, i64 2
%t0 = load i8, i8* %ptr
%t1 = load i8, i8* %arrayidx1
%t2 = load i8, i8* %arrayidx2
ret void
}
; Deal with >1 GEP index.
define void @multi_index_gep(<4 x i8>* %ptr) {
; CHECK-LABEL: @multi_index_gep(<4 x i8>* %ptr)
; FIXME: %ptr should be dereferenceable(4)
%arrayidx00 = getelementptr <4 x i8>, <4 x i8>* %ptr, i64 0, i64 0
%t0 = load i8, i8* %arrayidx00
ret void
}
; Could round weird bitwidths down?
define void @not_byte_multiple(i9* %ptr) {
; CHECK-LABEL: @not_byte_multiple(i9* %ptr)
%arrayidx0 = getelementptr i9, i9* %ptr, i64 0
%t0 = load i9, i9* %arrayidx0
ret void
}
; Missing direct access from the pointer.
define void @no_pointer_deref(i16* %ptr) {
; CHECK-LABEL: @no_pointer_deref(i16* %ptr)
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
%arrayidx2 = getelementptr i16, i16* %ptr, i64 2
%t1 = load i16, i16* %arrayidx1
%t2 = load i16, i16* %arrayidx2
ret void
}
; Out-of-order is ok, but missing access concludes dereferenceable range.
define void @non_consecutive(i32* %ptr) {
; CHECK-LABEL: @non_consecutive(i32* %ptr)
%arrayidx1 = getelementptr i32, i32* %ptr, i64 1
%arrayidx0 = getelementptr i32, i32* %ptr, i64 0
%arrayidx3 = getelementptr i32, i32* %ptr, i64 3
%t1 = load i32, i32* %arrayidx1
%t0 = load i32, i32* %arrayidx0
%t3 = load i32, i32* %arrayidx3
ret void
}
; Improve on existing dereferenceable attribute.
define void @more_bytes(i32* dereferenceable(8) %ptr) {
; CHECK-LABEL: @more_bytes(i32* dereferenceable(8) %ptr)
%arrayidx3 = getelementptr i32, i32* %ptr, i64 3
%arrayidx1 = getelementptr i32, i32* %ptr, i64 1
%arrayidx0 = getelementptr i32, i32* %ptr, i64 0
%arrayidx2 = getelementptr i32, i32* %ptr, i64 2
%t3 = load i32, i32* %arrayidx3
%t1 = load i32, i32* %arrayidx1
%t2 = load i32, i32* %arrayidx2
%t0 = load i32, i32* %arrayidx0
ret void
}
; Improve on existing dereferenceable_or_null attribute.
define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) {
; CHECK-LABEL: @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr)
%arrayidx3 = getelementptr i32, i32* %ptr, i64 3
%arrayidx1 = getelementptr i32, i32* %ptr, i64 1
%arrayidx0 = getelementptr i32, i32* %ptr, i64 0
%arrayidx2 = getelementptr i32, i32* %ptr, i64 2
%t3 = load i32, i32* %arrayidx3
%t1 = load i32, i32* %arrayidx1
%t2 = load i32, i32* %arrayidx2
%t0 = load i32, i32* %arrayidx0
ret void
}
; But don't pessimize existing dereferenceable attribute.
define void @better_bytes(i32* dereferenceable(100) %ptr) {
; CHECK-LABEL: @better_bytes(i32* dereferenceable(100) %ptr)
%arrayidx3 = getelementptr i32, i32* %ptr, i64 3
%arrayidx1 = getelementptr i32, i32* %ptr, i64 1
%arrayidx0 = getelementptr i32, i32* %ptr, i64 0
%arrayidx2 = getelementptr i32, i32* %ptr, i64 2
%t3 = load i32, i32* %arrayidx3
%t1 = load i32, i32* %arrayidx1
%t2 = load i32, i32* %arrayidx2
%t0 = load i32, i32* %arrayidx0
ret void
}
define void @bitcast(i32* %arg) {
; CHECK-LABEL: @bitcast(i32* %arg)
%ptr = bitcast i32* %arg to float*
%arrayidx0 = getelementptr float, float* %ptr, i64 0
%arrayidx1 = getelementptr float, float* %ptr, i64 1
%t0 = load float, float* %arrayidx0
%t1 = load float, float* %arrayidx1
ret void
}
define void @bitcast_different_sizes(double* %arg1, i8* %arg2) {
; CHECK-LABEL: @bitcast_different_sizes(double* %arg1, i8* %arg2)
%ptr1 = bitcast double* %arg1 to float*
%a10 = getelementptr float, float* %ptr1, i64 0
%a11 = getelementptr float, float* %ptr1, i64 1
%a12 = getelementptr float, float* %ptr1, i64 2
%ld10 = load float, float* %a10
%ld11 = load float, float* %a11
%ld12 = load float, float* %a12
%ptr2 = bitcast i8* %arg2 to i64*
%a20 = getelementptr i64, i64* %ptr2, i64 0
%a21 = getelementptr i64, i64* %ptr2, i64 1
%ld20 = load i64, i64* %a20
%ld21 = load i64, i64* %a21
ret void
}
define void @negative_offset(i32* %arg) {
; CHECK-LABEL: @negative_offset(i32* %arg)
%ptr = bitcast i32* %arg to float*
%arrayidx0 = getelementptr float, float* %ptr, i64 0
%arrayidx1 = getelementptr float, float* %ptr, i64 -1
%t0 = load float, float* %arrayidx0
%t1 = load float, float* %arrayidx1
ret void
}
define void @stores(i32* %arg) {
; CHECK-LABEL: @stores(i32* %arg)
%ptr = bitcast i32* %arg to float*
%arrayidx0 = getelementptr float, float* %ptr, i64 0
%arrayidx1 = getelementptr float, float* %ptr, i64 1
store float 1.0, float* %arrayidx0
store float 2.0, float* %arrayidx1
ret void
}
define void @load_store(i32* %arg) {
; CHECK-LABEL: @load_store(i32* %arg)
%ptr = bitcast i32* %arg to float*
%arrayidx0 = getelementptr float, float* %ptr, i64 0
%arrayidx1 = getelementptr float, float* %ptr, i64 1
%t1 = load float, float* %arrayidx0
store float 2.0, float* %arrayidx1
ret void
}
define void @different_size1(i32* %arg) {
; CHECK-LABEL: @different_size1(i32* %arg)
%arg-cast = bitcast i32* %arg to double*
store double 0.000000e+00, double* %arg-cast
store i32 0, i32* %arg
ret void
}
define void @different_size2(i32* %arg) {
; CHECK-LABEL: @different_size2(i32* %arg)
store i32 0, i32* %arg
%arg-cast = bitcast i32* %arg to double*
store double 0.000000e+00, double* %arg-cast
ret void
}

View File

@ -0,0 +1,13 @@
; RUN: opt -instcombine -mtriple=aarch64-linux-gnu -mattr=+sve -S < %s | FileCheck %s
; We shouldn't fold bitcast(insert <vscale x 1 x iX> .., iX %val, i32 0)
; into bitcast(iX %val) for scalable vectors.
define <vscale x 2 x i8> @bitcast_of_insert_i8_i16(i16 %val) #0 {
; CHECK-LABEL: @bitcast_of_insert_i8_i16(
; CHECK-NOT: bitcast i16 %val to <vscale x 2 x i8>
; CHECK: bitcast <vscale x 1 x i16> %op2 to <vscale x 2 x i8>
entry:
%op2 = insertelement <vscale x 1 x i16> poison, i16 %val, i32 0
%0 = bitcast <vscale x 1 x i16> %op2 to <vscale x 2 x i8>
ret <vscale x 2 x i8> %0
}

View File

@ -0,0 +1,194 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>)
declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>)
declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>)
declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>)
declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8 immarg) #0
;
; Demanded Elts
;
define double @elts_addsub_v2f64(<2 x double> %0, <2 x double> %1) {
; CHECK-LABEL: @elts_addsub_v2f64(
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP0:%.*]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
; CHECK-NEXT: ret double [[TMP5]]
;
%3 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%4 = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> <i32 1, i32 1>
%5 = tail call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %3, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @elts_addsub_v2f64_sub(<2 x double> %0, <2 x double> %1) {
; CHECK-LABEL: @elts_addsub_v2f64_sub(
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP0:%.*]], [[TMP1:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
; CHECK-NEXT: ret double [[TMP4]]
;
%3 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%4 = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%5 = tail call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %3, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define float @elts_addsub_v4f32(<4 x float> %0, <4 x float> %1) {
; CHECK-LABEL: @elts_addsub_v4f32(
; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]])
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]]
; CHECK-NEXT: ret float [[TMP6]]
;
%3 = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%4 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%5 = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %3, <4 x float> %4)
%6 = extractelement <4 x float> %5, i32 0
%7 = extractelement <4 x float> %5, i32 1
%8 = fadd float %6, %7
ret float %8
}
define float @elts_addsub_v4f32_add(<4 x float> %0, <4 x float> %1) {
; CHECK-LABEL: @elts_addsub_v4f32_add(
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP0:%.*]], [[TMP1:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]]
; CHECK-NEXT: ret float [[TMP6]]
;
%3 = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%4 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%5 = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %3, <4 x float> %4)
%6 = extractelement <4 x float> %5, i32 1
%7 = extractelement <4 x float> %5, i32 3
%8 = fadd float %6, %7
ret float %8
}
define double @elts_addsub_v4f64(<4 x double> %0, <4 x double> %1) {
; CHECK-LABEL: @elts_addsub_v4f64(
; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> [[TMP0:%.*]], <4 x double> [[TMP1:%.*]])
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = fadd double [[TMP4]], [[TMP5]]
; CHECK-NEXT: ret double [[TMP6]]
;
%3 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
%4 = shufflevector <4 x double> %1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
%5 = tail call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %3, <4 x double> %4)
%6 = extractelement <4 x double> %5, i32 0
%7 = extractelement <4 x double> %5, i32 1
%8 = fadd double %6, %7
ret double %8
}
define double @elts_addsub_v4f64_add(<4 x double> %0, <4 x double> %1) {
; CHECK-LABEL: @elts_addsub_v4f64_add(
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP0:%.*]], [[TMP1:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP3]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = fadd double [[TMP4]], [[TMP5]]
; CHECK-NEXT: ret double [[TMP6]]
;
%3 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
%4 = shufflevector <4 x double> %1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
%5 = tail call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %3, <4 x double> %4)
%6 = extractelement <4 x double> %5, i32 1
%7 = extractelement <4 x double> %5, i32 3
%8 = fadd double %6, %7
ret double %8
}
define float @elts_addsub_v8f32(<8 x float> %0, <8 x float> %1) {
; CHECK-LABEL: @elts_addsub_v8f32(
; CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> [[TMP0:%.*]], <8 x float> [[TMP1:%.*]])
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]]
; CHECK-NEXT: ret float [[TMP6]]
;
%3 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4, i32 4, i32 4>
%4 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4, i32 4, i32 4>
%5 = tail call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %3, <8 x float> %4)
%6 = extractelement <8 x float> %5, i32 0
%7 = extractelement <8 x float> %5, i32 1
%8 = fadd float %6, %7
ret float %8
}
define float @elts_addsub_v8f32_sub(<8 x float> %0, <8 x float> %1) {
; CHECK-LABEL: @elts_addsub_v8f32_sub(
; CHECK-NEXT: [[TMP3:%.*]] = fsub <8 x float> [[TMP0:%.*]], [[TMP1:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP3]], i32 4
; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]]
; CHECK-NEXT: ret float [[TMP6]]
;
%3 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4, i32 4, i32 4>
%4 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4, i32 4, i32 4>
%5 = tail call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %3, <8 x float> %4)
%6 = extractelement <8 x float> %5, i32 0
%7 = extractelement <8 x float> %5, i32 4
%8 = fadd float %6, %7
ret float %8
}
define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, float* %5) {
; CHECK-LABEL: @PR46277(
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP1:%.*]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> [[TMP8]], <4 x float> [[TMP4:%.*]])
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP5:%.*]], i64 1
; CHECK-NEXT: store float [[TMP10]], float* [[TMP5]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 1
; CHECK-NEXT: store float [[TMP12]], float* [[TMP11]], align 4
; CHECK-NEXT: ret void
;
%7 = insertelement <4 x float> poison, float %0, i32 0
%8 = insertelement <4 x float> %7, float %1, i32 1
%9 = insertelement <4 x float> %8, float %2, i32 2
%10 = insertelement <4 x float> %9, float %3, i32 3
%11 = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %10, <4 x float> %4)
%12 = extractelement <4 x float> %11, i32 0
%13 = getelementptr inbounds float, float* %5, i64 1
store float %12, float* %5, align 4
%14 = extractelement <4 x float> %11, i32 1
store float %14, float* %13, align 4
ret void
}
define double @PR48476_fsub(<2 x double> %x) {
; CHECK-LABEL: @PR48476_fsub(
; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> <double 0.000000e+00, double undef>, [[X:%.*]]
; CHECK-NEXT: [[T2:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[X]], i8 6)
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x double> [[T2]], i32 0
; CHECK-NEXT: ret double [[VECEXT]]
;
%t1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> zeroinitializer, <2 x double> %x)
%t2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %t1, <2 x double> %x, i8 6)
%vecext = extractelement <2 x double> %t2, i32 0
ret double %vecext
}
define double @PR48476_fadd_fsub(<2 x double> %x) {
; CHECK-LABEL: @PR48476_fadd_fsub(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[X:%.*]], <double undef, double 0.000000e+00>
; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[S]], [[X]]
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[VECEXT]]
;
%t1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> zeroinitializer, <2 x double> %x)
%s = shufflevector <2 x double> %t1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
%t2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %s, <2 x double> %x)
%vecext = extractelement <2 x double> %t2, i32 0
ret double %vecext
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,635 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
;
; UNDEF Elts
;
define <8 x i16> @undef_packssdw_128() {
; CHECK-LABEL: @undef_packssdw_128(
; CHECK-NEXT: ret <8 x i16> undef
;
%1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> undef, <4 x i32> undef)
ret <8 x i16> %1
}
define <8 x i16> @undef_packusdw_128() {
; CHECK-LABEL: @undef_packusdw_128(
; CHECK-NEXT: ret <8 x i16> undef
;
%1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> undef)
ret <8 x i16> %1
}
define <16 x i8> @undef_packsswb_128() {
; CHECK-LABEL: @undef_packsswb_128(
; CHECK-NEXT: ret <16 x i8> undef
;
%1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> undef, <8 x i16> undef)
ret <16 x i8> %1
}
define <16 x i8> @undef_packuswb_128() {
; CHECK-LABEL: @undef_packuswb_128(
; CHECK-NEXT: ret <16 x i8> undef
;
%1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> undef, <8 x i16> undef)
ret <16 x i8> %1
}
define <16 x i16> @undef_packssdw_256() {
; CHECK-LABEL: @undef_packssdw_256(
; CHECK-NEXT: ret <16 x i16> undef
;
%1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> undef, <8 x i32> undef)
ret <16 x i16> %1
}
define <16 x i16> @undef_packusdw_256() {
; CHECK-LABEL: @undef_packusdw_256(
; CHECK-NEXT: ret <16 x i16> undef
;
%1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> undef)
ret <16 x i16> %1
}
define <32 x i8> @undef_packsswb_256() {
; CHECK-LABEL: @undef_packsswb_256(
; CHECK-NEXT: ret <32 x i8> undef
;
%1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> undef)
ret <32 x i8> %1
}
define <32 x i8> @undef_packuswb_256() {
; CHECK-LABEL: @undef_packuswb_256(
; CHECK-NEXT: ret <32 x i8> undef
;
%1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> undef, <16 x i16> undef)
ret <32 x i8> %1
}
define <32 x i16> @undef_packssdw_512() {
; CHECK-LABEL: @undef_packssdw_512(
; CHECK-NEXT: ret <32 x i16> undef
;
%1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> undef, <16 x i32> undef)
ret <32 x i16> %1
}
define <32 x i16> @undef_packusdw_512() {
; CHECK-LABEL: @undef_packusdw_512(
; CHECK-NEXT: ret <32 x i16> undef
;
%1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> undef)
ret <32 x i16> %1
}
define <64 x i8> @undef_packsswb_512() {
; CHECK-LABEL: @undef_packsswb_512(
; CHECK-NEXT: ret <64 x i8> undef
;
%1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> undef)
ret <64 x i8> %1
}
define <64 x i8> @undef_packuswb_512() {
; CHECK-LABEL: @undef_packuswb_512(
; CHECK-NEXT: ret <64 x i8> undef
;
%1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> undef, <32 x i16> undef)
ret <64 x i8> %1
}
;
; Constant Folding
;
define <8 x i16> @fold_packssdw_128() {
; CHECK-LABEL: @fold_packssdw_128(
; CHECK-NEXT: ret <8 x i16> <i16 0, i16 -1, i16 32767, i16 -32768, i16 0, i16 0, i16 0, i16 0>
;
%1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> <i32 0, i32 -1, i32 65536, i32 -131072>, <4 x i32> zeroinitializer)
ret <8 x i16> %1
}
define <8 x i16> @fold_packusdw_128() {
; CHECK-LABEL: @fold_packusdw_128(
; CHECK-NEXT: ret <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 -32768, i16 -1>
;
%1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> <i32 0, i32 -1, i32 32768, i32 65537>)
ret <8 x i16> %1
}
define <16 x i8> @fold_packsswb_128() {
; CHECK-LABEL: @fold_packsswb_128(
; CHECK-NEXT: ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
;
%1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> undef)
ret <16 x i8> %1
}
define <16 x i8> @fold_packuswb_128() {
; CHECK-LABEL: @fold_packuswb_128(
; CHECK-NEXT: ret <16 x i8> <i8 0, i8 1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 15, i8 0, i8 127, i8 0, i8 1, i8 0, i8 1, i8 0, i8 0>
;
%1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 1, i16 -1, i16 255, i16 65535, i16 -32768, i16 -127, i16 15>, <8 x i16> <i16 -15, i16 127, i16 32768, i16 -65535, i16 -255, i16 1, i16 -1, i16 0>)
ret <16 x i8> %1
}
define <16 x i16> @fold_packssdw_256() {
; CHECK-LABEL: @fold_packssdw_256(
; CHECK-NEXT: ret <16 x i16> <i16 0, i16 256, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef>
;
%1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <8 x i32> undef)
ret <16 x i16> %1
}
define <16 x i16> @fold_packusdw_256() {
; CHECK-LABEL: @fold_packusdw_256(
; CHECK-NEXT: ret <16 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 256, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767>
;
%1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> <i32 0, i32 -256, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>)
ret <16 x i16> %1
}
define <32 x i8> @fold_packsswb_256() {
; CHECK-LABEL: @fold_packsswb_256(
; CHECK-NEXT: ret <32 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
;
%1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> zeroinitializer)
ret <32 x i8> %1
}
define <32 x i8> @fold_packuswb_256() {
; CHECK-LABEL: @fold_packuswb_256(
; CHECK-NEXT: ret <32 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64>
;
%1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> zeroinitializer, <16 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 256, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>)
ret <32 x i8> %1
}
define <32 x i16> @fold_packssdw_512() {
; CHECK-LABEL: @fold_packssdw_512(
; CHECK-NEXT: ret <32 x i16> <i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef>
;
%1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <16 x i32> undef)
ret <32 x i16> %1
}
define <32 x i16> @fold_packusdw_512() {
; CHECK-LABEL: @fold_packusdw_512(
; CHECK-NEXT: ret <32 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767, i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767>
;
%1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> <i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767, i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>)
ret <32 x i16> %1
}
define <64 x i8> @fold_packsswb_512() {
; CHECK-LABEL: @fold_packsswb_512(
; CHECK-NEXT: ret <64 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
;
%1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> zeroinitializer)
ret <64 x i8> %1
}
define <64 x i8> @fold_packuswb_512() {
; CHECK-LABEL: @fold_packuswb_512(
; CHECK-NEXT: ret <64 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64>
;
%1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> zeroinitializer, <32 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>)
ret <64 x i8> %1
}
;
; Demanded Elts
;
define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @elts_packssdw_128(
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> undef)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 undef, i32 undef>
%2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
%3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2)
%4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 7, i32 7, i32 7, i32 7>
ret <8 x i16> %4
}
define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @elts_packusdw_128(
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef>
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%1 = insertelement <4 x i32> %a0, i32 0, i32 0
%2 = insertelement <4 x i32> %a1, i32 0, i32 3
%3 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %1, <4 x i32> %2)
%4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef>
ret <8 x i16> %4
}
define <16 x i8> @elts_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: @elts_packsswb_128(
; CHECK-NEXT: ret <16 x i8> zeroinitializer
;
%1 = insertelement <8 x i16> %a0, i16 0, i32 0
%2 = insertelement <8 x i16> %a1, i16 0, i32 0
%3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2)
%4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <16 x i8> %4
}
define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: @elts_packuswb_128(
; CHECK-NEXT: ret <16 x i8> undef
;
%1 = insertelement <8 x i16> poison, i16 0, i32 0
%2 = insertelement <8 x i16> poison, i16 0, i32 0
%3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2)
%4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
ret <16 x i8> %4
}
define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @elts_packssdw_256(
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> undef)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> undef, <16 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <16 x i16> [[TMP2]]
;
%1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef>
%3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2)
%4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15>
ret <16 x i16> %4
}
define <16 x i16> @elts_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @elts_packusdw_256(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <16 x i16> [[TMP3]]
;
%1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
%3 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %2)
%4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i16> %4
}
define <32 x i8> @elts_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK-LABEL: @elts_packsswb_256(
; CHECK-NEXT: ret <32 x i8> zeroinitializer
;
%1 = insertelement <16 x i16> %a0, i16 0, i32 0
%2 = insertelement <16 x i16> %a1, i16 0, i32 8
%3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2)
%4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
ret <32 x i8> %4
}
define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK-LABEL: @elts_packuswb_256(
; CHECK-NEXT: ret <32 x i8> undef
;
%1 = insertelement <16 x i16> poison, i16 0, i32 1
%2 = insertelement <16 x i16> poison, i16 0, i32 0
%3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2)
%4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> zeroinitializer
ret <32 x i8> %4
}
define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @elts_packssdw_512(
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> undef)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> undef, <32 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 24, i32 undef, i32 undef, i32 27, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <32 x i16> [[TMP2]]
;
%1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef, i32 undef, i32 10, i32 9, i32 undef, i32 undef, i32 14, i32 13, i32 undef>
%3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2)
%4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 18, i32 19, i32 20, i32 undef, i32 undef, i32 23, i32 24, i32 undef, i32 undef, i32 27, i32 28, i32 undef, i32 undef, i32 31>
ret <32 x i16> %4
}
define <32 x i16> @elts_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @elts_packusdw_512(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <32 x i16> [[TMP3]]
;
%1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
%3 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %1, <16 x i32> %2)
%4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef>
ret <32 x i16> %4
}
define <64 x i8> @elts_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) {
; CHECK-LABEL: @elts_packsswb_512(
; CHECK-NEXT: ret <64 x i8> zeroinitializer
;
%1 = insertelement <32 x i16> %a0, i16 0, i32 0
%2 = insertelement <32 x i16> %a1, i16 0, i32 8
%3 = insertelement <32 x i16> %1, i16 0, i32 16
%4 = insertelement <32 x i16> %2, i16 0, i32 24
%5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4)
%6 = shufflevector <64 x i8> %5, <64 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
ret <64 x i8> %6
}
define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) {
; CHECK-LABEL: @elts_packuswb_512(
; CHECK-NEXT: ret <64 x i8> undef
;
%1 = insertelement <32 x i16> poison, i16 0, i32 1
%2 = insertelement <32 x i16> poison, i16 0, i32 0
%3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2)
%4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer
ret <64 x i8> %4
}
;
; Truncation (without Saturation)
;
define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @trunc_packssdw_128(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17>
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
; CHECK-NEXT: ret <8 x i16> [[TMP3]]
;
%1 = ashr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17>
%2 = and <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15>
%3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2)
ret <8 x i16> %3
}
define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @trunc_packusdw_128(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17>
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
; CHECK-NEXT: ret <8 x i16> [[TMP3]]
;
%1 = lshr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17>
%2 = and <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15>
%3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2)
ret <8 x i16> %3
}
define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: @trunc_packsswb_128(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
; CHECK-NEXT: ret <16 x i8> [[TMP3]]
;
%1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2)
ret <16 x i8> %3
}
define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: @trunc_packuswb_128(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
; CHECK-NEXT: ret <16 x i8> [[TMP3]]
;
%1 = lshr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2)
ret <16 x i8> %3
}
define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @trunc_packssdw_256(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
; CHECK-NEXT: ret <16 x i16> [[TMP3]]
;
%1 = ashr <8 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
%2 = ashr <8 x i32> %a1, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
%3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2)
ret <16 x i16> %3
}
define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @trunc_packusdw_256(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
; CHECK-NEXT: ret <16 x i16> [[TMP3]]
;
%1 = lshr <8 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
%2 = and <8 x i32> %a1, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
%3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2)
ret <16 x i16> %3
}
define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK-LABEL: @trunc_packsswb_256(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
; CHECK-NEXT: ret <32 x i8> [[TMP3]]
;
%1 = ashr <16 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = and <16 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2)
ret <32 x i8> %3
}
define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK-LABEL: @trunc_packuswb_256(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
; CHECK-NEXT: ret <32 x i8> [[TMP3]]
;
%1 = lshr <16 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = and <16 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2)
ret <32 x i8> %3
}
define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @trunc_packssdw_512(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
; CHECK-NEXT: ret <32 x i16> [[TMP3]]
;
%1 = ashr <16 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
%2 = ashr <16 x i32> %a1, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
%3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2)
ret <32 x i16> %3
}
define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @trunc_packusdw_512(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
; CHECK-NEXT: ret <32 x i16> [[TMP3]]
;
%1 = lshr <16 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
%2 = and <16 x i32> %a1, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
%3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2)
ret <32 x i16> %3
}
define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) {
; CHECK-LABEL: @trunc_packsswb_512(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
; CHECK-NEXT: ret <64 x i8> [[TMP3]]
;
%1 = ashr <32 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = and <32 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %1, <32 x i16> %2)
ret <64 x i8> %3
}
define <64 x i8> @trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) {
; CHECK-LABEL: @trunc_packuswb_512(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
; CHECK-NEXT: ret <64 x i8> [[TMP3]]
;
%1 = lshr <32 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = and <32 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2)
ret <64 x i8> %3
}
;
; Signed Pack Comparison Results
;
define <8 x i16> @cmp_packssdw_128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) {
; CHECK-LABEL: @cmp_packssdw_128(
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[A0:%.*]], [[A1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[A2:%.*]], [[A3:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]])
; CHECK-NEXT: ret <8 x i16> [[TMP5]]
;
%1 = icmp eq <4 x i32> %a0, %a1
%2 = icmp eq <4 x i32> %a2, %a3
%3 = sext <4 x i1> %1 to <4 x i32>
%4 = sext <4 x i1> %2 to <4 x i32>
%5 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %3, <4 x i32> %4)
ret <8 x i16> %5
}
define <16 x i8> @cmp_packsswb_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) {
; CHECK-LABEL: @cmp_packsswb_128(
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A0:%.*]], [[A1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[A2:%.*]], [[A3:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]])
; CHECK-NEXT: ret <16 x i8> [[TMP5]]
;
%1 = icmp eq <8 x i16> %a0, %a1
%2 = icmp eq <8 x i16> %a2, %a3
%3 = sext <8 x i1> %1 to <8 x i16>
%4 = sext <8 x i1> %2 to <8 x i16>
%5 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %3, <8 x i16> %4)
ret <16 x i8> %5
}
define <16 x i16> @cmp_packssdw_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) {
; CHECK-LABEL: @cmp_packssdw_256(
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i32> [[A0:%.*]], [[A1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i32> [[A2:%.*]], [[A3:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i32>
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP3]], <8 x i32> [[TMP4]])
; CHECK-NEXT: ret <16 x i16> [[TMP5]]
;
%1 = icmp eq <8 x i32> %a0, %a1
%2 = icmp eq <8 x i32> %a2, %a3
%3 = sext <8 x i1> %1 to <8 x i32>
%4 = sext <8 x i1> %2 to <8 x i32>
%5 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %3, <8 x i32> %4)
ret <16 x i16> %5
}
define <32 x i8> @cmp_packsswb_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2, <16 x i16> %a3) {
; CHECK-LABEL: @cmp_packsswb_256(
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <16 x i16> [[A0:%.*]], [[A1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i16> [[A2:%.*]], [[A3:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i16>
; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i16>
; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP3]], <16 x i16> [[TMP4]])
; CHECK-NEXT: ret <32 x i8> [[TMP5]]
;
%1 = icmp eq <16 x i16> %a0, %a1
%2 = icmp eq <16 x i16> %a2, %a3
%3 = sext <16 x i1> %1 to <16 x i16>
%4 = sext <16 x i1> %2 to <16 x i16>
%5 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %3, <16 x i16> %4)
ret <32 x i8> %5
}
define <32 x i16> @cmp_packssdw_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, <16 x i32> %a3) {
; CHECK-LABEL: @cmp_packssdw_512(
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <16 x i32> [[A0:%.*]], [[A1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[A2:%.*]], [[A3:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i32>
; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP3]], <16 x i32> [[TMP4]])
; CHECK-NEXT: ret <32 x i16> [[TMP5]]
;
%1 = icmp eq <16 x i32> %a0, %a1
%2 = icmp eq <16 x i32> %a2, %a3
%3 = sext <16 x i1> %1 to <16 x i32>
%4 = sext <16 x i1> %2 to <16 x i32>
%5 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %3, <16 x i32> %4)
ret <32 x i16> %5
}
define <64 x i8> @cmp_packsswb_512(<32 x i16> %a0, <32 x i16> %a1, <32 x i16> %a2, <32 x i16> %a3) {
; CHECK-LABEL: @cmp_packsswb_512(
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <32 x i16> [[A0:%.*]], [[A1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <32 x i16> [[A2:%.*]], [[A3:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = sext <32 x i1> [[TMP1]] to <32 x i16>
; CHECK-NEXT: [[TMP4:%.*]] = sext <32 x i1> [[TMP2]] to <32 x i16>
; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP3]], <32 x i16> [[TMP4]])
; CHECK-NEXT: ret <64 x i8> [[TMP5]]
;
%1 = icmp eq <32 x i16> %a0, %a1
%2 = icmp eq <32 x i16> %a2, %a3
%3 = sext <32 x i1> %1 to <32 x i16>
%4 = sext <32 x i1> %2 to <32 x i16>
%5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4)
ret <64 x i8> %5
}
declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) nounwind readnone
declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) nounwind readnone
declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) nounwind readnone
declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) nounwind readnone

View File

@ -0,0 +1,694 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define float @test_rcp_ss_0(float %a) {
; CHECK-LABEL: @test_rcp_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: ret float [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
%6 = extractelement <4 x float> %5, i32 0
ret float %6
}
define float @test_rcp_ss_1(float %a) {
; CHECK-LABEL: @test_rcp_ss_1(
; CHECK-NEXT: ret float 1.000000e+00
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
%6 = extractelement <4 x float> %5, i32 1
ret float %6
}
define float @test_sqrt_ss_0(float %a) {
; CHECK-LABEL: @test_sqrt_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.sqrt.f32(float [[A:%.*]])
; CHECK-NEXT: ret float [[TMP1]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
%6 = extractelement <4 x float> %5, i32 0
ret float %6
}
define float @test_sqrt_ss_2(float %a) {
; CHECK-LABEL: @test_sqrt_ss_2(
; CHECK-NEXT: ret float 2.000000e+00
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
%6 = extractelement <4 x float> %5, i32 2
ret float %6
}
define float @test_rsqrt_ss_0(float %a) {
; CHECK-LABEL: @test_rsqrt_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: ret float [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
%6 = extractelement <4 x float> %5, i32 0
ret float %6
}
define float @test_rsqrt_ss_3(float %a) {
; CHECK-LABEL: @test_rsqrt_ss_3(
; CHECK-NEXT: ret float 3.000000e+00
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
%6 = extractelement <4 x float> %5, i32 3
ret float %6
}
define float @test_add_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_add_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = fadd float [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret float [[TMP1]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
%r = extractelement <4 x float> %9, i32 0
ret float %r
}
define float @test_add_ss_1(float %a, float %b) {
; CHECK-LABEL: @test_add_ss_1(
; CHECK-NEXT: ret float 1.000000e+00
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5)
%7 = extractelement <4 x float> %6, i32 1
ret float %7
}
define float @test_add_ss_2(float %a) {
; CHECK-LABEL: @test_add_ss_2(
; CHECK-NEXT: [[TMP1:%.*]] = fadd float [[A:%.*]], [[A]]
; CHECK-NEXT: ret float [[TMP1]]
;
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
%2 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %1, <4 x float> %1)
%3 = extractelement <4 x float> %2, i32 0
ret float %3
}
define float @test_sub_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_sub_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = fsub float [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret float [[TMP1]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8)
%r = extractelement <4 x float> %9, i32 0
ret float %r
}
define float @test_sub_ss_2(float %a, float %b) {
; CHECK-LABEL: @test_sub_ss_2(
; CHECK-NEXT: ret float 2.000000e+00
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5)
%7 = extractelement <4 x float> %6, i32 2
ret float %7
}
define float @test_sub_ss_3(float %a) {
; CHECK-LABEL: @test_sub_ss_3(
; CHECK-NEXT: [[TMP1:%.*]] = fsub float [[A:%.*]], [[A]]
; CHECK-NEXT: ret float [[TMP1]]
;
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
%2 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %1, <4 x float> %1)
%3 = extractelement <4 x float> %2, i32 0
ret float %3
}
define float @test_mul_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_mul_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = fmul float [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret float [[TMP1]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8)
%r = extractelement <4 x float> %9, i32 0
ret float %r
}
define float @test_mul_ss_3(float %a, float %b) {
; CHECK-LABEL: @test_mul_ss_3(
; CHECK-NEXT: ret float 3.000000e+00
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5)
%7 = extractelement <4 x float> %6, i32 3
ret float %7
}
define float @test_mul_ss_4(float %a) {
; CHECK-LABEL: @test_mul_ss_4(
; CHECK-NEXT: [[TMP1:%.*]] = fmul float [[A:%.*]], [[A]]
; CHECK-NEXT: ret float [[TMP1]]
;
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
%2 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %1, <4 x float> %1)
%3 = extractelement <4 x float> %2, i32 0
ret float %3
}
define float @test_div_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_div_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = fdiv float [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret float [[TMP1]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
%r = extractelement <4 x float> %9, i32 0
ret float %r
}
define float @test_div_ss_1(float %a, float %b) {
; CHECK-LABEL: @test_div_ss_1(
; CHECK-NEXT: ret float 1.000000e+00
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5)
%7 = extractelement <4 x float> %6, i32 1
ret float %7
}
define float @test_div_ss_2(float %a) {
; CHECK-LABEL: @test_div_ss_2(
; CHECK-NEXT: [[TMP1:%.*]] = fdiv float [[A:%.*]], [[A]]
; CHECK-NEXT: ret float [[TMP1]]
;
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
%2 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %1, <4 x float> %1)
%3 = extractelement <4 x float> %2, i32 0
ret float %3
}
define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_min_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3)
ret <4 x float> %4
}
define float @test_min_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_min_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
; CHECK-NEXT: ret float [[TMP4]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8)
%10 = extractelement <4 x float> %9, i32 0
ret float %10
}
define float @test_min_ss_2(float %a, float %b) {
; CHECK-LABEL: @test_min_ss_2(
; CHECK-NEXT: ret float 2.000000e+00
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5)
%7 = extractelement <4 x float> %6, i32 2
ret float %7
}
define float @test_min_ss_3(float %a) {
; CHECK-LABEL: @test_min_ss_3(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: ret float [[TMP3]]
;
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
%2 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %1)
%3 = extractelement <4 x float> %2, i32 0
ret float %3
}
define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_max_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3)
ret <4 x float> %4
}
define float @test_max_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_max_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
; CHECK-NEXT: ret float [[TMP4]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8)
%10 = extractelement <4 x float> %9, i32 0
ret float %10
}
define float @test_max_ss_3(float %a, float %b) {
; CHECK-LABEL: @test_max_ss_3(
; CHECK-NEXT: ret float 3.000000e+00
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5)
%7 = extractelement <4 x float> %6, i32 3
ret float %7
}
define float @test_max_ss_4(float %a) {
; CHECK-LABEL: @test_max_ss_4(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: ret float [[TMP3]]
;
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
%2 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %1)
%3 = extractelement <4 x float> %2, i32 0
ret float %3
}
define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_cmp_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i8 0)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0)
ret <4 x float> %4
}
define float @test_cmp_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_cmp_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0)
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
; CHECK-NEXT: ret float [[R]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
%r = extractelement <4 x float> %9, i32 0
ret float %r
}
define float @test_cmp_ss_1(float %a, float %b) {
; CHECK-LABEL: @test_cmp_ss_1(
; CHECK-NEXT: ret float 1.000000e+00
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0)
%7 = extractelement <4 x float> %6, i32 1
ret float %7
}
define float @test_cmp_ss_2(float %a) {
; CHECK-LABEL: @test_cmp_ss_2(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]], i8 3)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: ret float [[TMP3]]
;
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
%2 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %1, i8 3)
%3 = extractelement <4 x float> %2, i32 0
ret float %3
}
define i32 @test_comieq_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_comieq_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8)
ret i32 %9
}
define i32 @test_comige_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_comige_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8)
ret i32 %9
}
define i32 @test_comigt_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_comigt_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8)
ret i32 %9
}
define i32 @test_comile_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_comile_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8)
ret i32 %9
}
define i32 @test_comilt_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_comilt_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8)
ret i32 %9
}
define i32 @test_comineq_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_comineq_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8)
ret i32 %9
}
define i32 @test_ucomieq_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_ucomieq_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8)
ret i32 %9
}
define i32 @test_ucomige_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_ucomige_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8)
ret i32 %9
}
define i32 @test_ucomigt_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_ucomigt_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8)
ret i32 %9
}
define i32 @test_ucomile_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_ucomile_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8)
ret i32 %9
}
define i32 @test_ucomilt_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_ucomilt_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8)
ret i32 %9
}
define i32 @test_ucomineq_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_ucomineq_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8)
ret i32 %9
}
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8)
declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>)

View File

@ -0,0 +1,541 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define double @test_sqrt_sd_0(double %a) {
; CHECK-LABEL: @test_sqrt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[A:%.*]])
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
%4 = extractelement <2 x double> %3, i32 0
ret double %4
}
define double @test_sqrt_sd_1(double %a) {
; CHECK-LABEL: @test_sqrt_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
%4 = extractelement <2 x double> %3, i32 1
ret double %4
}
define double @test_add_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_add_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = fadd double [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_add_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_add_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define double @test_add_sd_2(double %a) {
; CHECK-LABEL: @test_add_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = fadd double [[A:%.*]], [[A]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %1, <2 x double> %1)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}
define double @test_sub_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_sub_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = fsub double [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_sub_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_sub_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define double @test_sub_sd_2(double %a) {
; CHECK-LABEL: @test_sub_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = fsub double [[A:%.*]], [[A]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %1, <2 x double> %1)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}
define double @test_mul_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_mul_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = fmul double [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_mul_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_mul_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define double @test_mul_sd_2(double %a) {
; CHECK-LABEL: @test_mul_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = fmul double [[A:%.*]], [[A]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %1, <2 x double> %1)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}
define double @test_div_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_div_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = fdiv double [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_div_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_div_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define double @test_div_sd_2(double %a) {
; CHECK-LABEL: @test_div_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = fdiv double [[A:%.*]], [[A]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %1, <2 x double> %1)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}
define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_min_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]])
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %1)
ret <2 x double> %2
}
define double @test_min_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_min_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
; CHECK-NEXT: ret double [[TMP4]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_min_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_min_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define double @test_min_sd_2(double %a) {
; CHECK-LABEL: @test_min_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[TMP1]], <2 x double> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %1, <2 x double> %1)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}
define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_max_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]])
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %1)
ret <2 x double> %2
}
define double @test_max_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_max_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
; CHECK-NEXT: ret double [[TMP4]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_max_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_max_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define double @test_max_sd_2(double %a) {
; CHECK-LABEL: @test_max_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[TMP1]], <2 x double> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %1, <2 x double> %1)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}
define <2 x double> @test_cmp_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_cmp_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i8 0)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %1, i8 0)
ret <2 x double> %2
}
define double @test_cmp_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_cmp_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i8 0)
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
; CHECK-NEXT: ret double [[TMP4]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_cmp_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_cmp_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define double @test_cmp_sd_2(double %a) {
; CHECK-LABEL: @test_cmp_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[TMP1]], i8 3)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %1, <2 x double> %1, i8 3)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}
define i32 @test_comieq_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comieq_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_comige_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comige_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_comigt_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comigt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_comile_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comile_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_comilt_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comilt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_comineq_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comineq_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_ucomieq_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomieq_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_ucomige_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomige_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_ucomigt_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomigt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_ucomile_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomile_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_ucomilt_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomilt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
define i32 @test_ucomineq_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomineq_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %2, <2 x double> %4)
ret i32 %5
}
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8)
declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>)
declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>)

View File

@ -0,0 +1,124 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_round_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 10)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0
%2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
%3 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %2, i32 10)
ret <2 x double> %3
}
define double @test_round_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_round_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> undef, <2 x double> [[TMP1]], i32 10)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
%6 = extractelement <2 x double> %5, i32 0
ret double %6
}
define double @test_round_sd_1(double %a, double %b) {
; CHECK-LABEL: @test_round_sd_1(
; CHECK-NEXT: ret double 1.000000e+00
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = insertelement <2 x double> poison, double %b, i32 0
%4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
%5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
%6 = extractelement <2 x double> %5, i32 1
ret double %6
}
define double @test_round_sd_2(double %a) {
; CHECK-LABEL: @test_round_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> undef, <2 x double> [[TMP1]], i32 10)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %1, i32 10)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}
define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_round_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x float> [[B:%.*]], i32 10)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%5 = insertelement <4 x float> %4, float 2.000000e+00, i32 2
%6 = insertelement <4 x float> %5, float 3.000000e+00, i32 3
%7 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %3, <4 x float> %6, i32 10)
ret <4 x float> %7
}
define float @test_round_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_round_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> undef, <4 x float> [[TMP1]], i32 10)
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: ret float [[R]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
%r = extractelement <4 x float> %9, i32 0
ret float %r
}
define float @test_round_ss_2(float %a, float %b) {
; CHECK-LABEL: @test_round_ss_2(
; CHECK-NEXT: ret float 2.000000e+00
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = insertelement <4 x float> poison, float %b, i32 0
%6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
%7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
%8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
%9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
%r = extractelement <4 x float> %9, i32 2
ret float %r
}
define float @test_round_ss_3(float %a) {
; CHECK-LABEL: @test_round_ss_3(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> undef, <4 x float> [[TMP1]], i32 10)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: ret float [[TMP3]]
;
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
%2 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %1, <4 x float> %1, i32 10)
%3 = extractelement <4 x float> %2, i32 0
ret float %3
}
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone

View File

@ -0,0 +1,110 @@
; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define i16 @test1(float %f) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[TMP281:%.*]] = fadd float %f, -1.000000e+00
; CHECK-NEXT: [[TMP373:%.*]] = fmul float [[TMP281]], 5.000000e-01
; CHECK-NEXT: [[TMP374:%.*]] = insertelement <4 x float> undef, float [[TMP373]], i32 0
; CHECK-NEXT: [[TMP48:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP374]], <4 x float> <float 6.553500e+04, float undef, float undef, float undef>)
; CHECK-NEXT: [[TMP59:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP48]], <4 x float> <float 0.000000e+00, float undef, float undef, float undef>)
; CHECK-NEXT: [[TMP_UPGRD_1:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[TMP59]])
; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP_UPGRD_1]] to i16
; CHECK-NEXT: ret i16 [[TMP69]]
;
%tmp = insertelement <4 x float> poison, float %f, i32 0
%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1
%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )
%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )
%tmp69 = trunc i32 %tmp.upgrd.1 to i16
ret i16 %tmp69
}
define i64 @test3(float %f, double %d) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[V00:%.*]] = insertelement <4 x float> undef, float %f, i32 0
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> [[V00]])
; CHECK-NEXT: [[V10:%.*]] = insertelement <4 x float> undef, float %f, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> [[V10]])
; CHECK-NEXT: [[V20:%.*]] = insertelement <4 x float> undef, float %f, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[V20]])
; CHECK-NEXT: [[V30:%.*]] = insertelement <4 x float> undef, float %f, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> [[V30]])
; CHECK-NEXT: [[V40:%.*]] = insertelement <2 x double> undef, double %d, i32 0
; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[V40]])
; CHECK-NEXT: [[V50:%.*]] = insertelement <2 x double> undef, double %d, i32 0
; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> [[V50]])
; CHECK-NEXT: [[V60:%.*]] = insertelement <2 x double> undef, double %d, i32 0
; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[V60]])
; CHECK-NEXT: [[V70:%.*]] = insertelement <2 x double> undef, double %d, i32 0
; CHECK-NEXT: [[TMP7:%.*]] = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> [[V70]])
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
; CHECK-NEXT: ret i64 [[TMP15]]
;
%v00 = insertelement <4 x float> poison, float %f, i32 0
%v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
%v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
%v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
%tmp0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %v03)
%v10 = insertelement <4 x float> poison, float %f, i32 0
%v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
%v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
%v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
%tmp1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %v13)
%v20 = insertelement <4 x float> poison, float %f, i32 0
%v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
%v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
%v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
%tmp2 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %v23)
%v30 = insertelement <4 x float> poison, float %f, i32 0
%v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
%v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
%v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
%tmp3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %v33)
%v40 = insertelement <2 x double> poison, double %d, i32 0
%v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
%tmp4 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %v41)
%v50 = insertelement <2 x double> poison, double %d, i32 0
%v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
%tmp5 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %v51)
%v60 = insertelement <2 x double> poison, double %d, i32 0
%v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
%tmp6 = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %v61)
%v70 = insertelement <2 x double> poison, double %d, i32 0
%v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
%tmp7 = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %v71)
%tmp8 = add i32 %tmp0, %tmp2
%tmp9 = add i32 %tmp4, %tmp6
%tmp10 = add i32 %tmp8, %tmp9
%tmp11 = sext i32 %tmp10 to i64
%tmp12 = add i64 %tmp1, %tmp3
%tmp13 = add i64 %tmp5, %tmp7
%tmp14 = add i64 %tmp12, %tmp13
%tmp15 = add i64 %tmp11, %tmp14
ret i64 %tmp15
}
declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.cvtss2si(<4 x float>)
declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>)
declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)
declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>)
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,305 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
define <2 x double> @test_vfrcz_sd(<2 x double> %a) {
; CHECK-LABEL: @test_vfrcz_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[A:%.*]])
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %1)
ret <2 x double> %2
}
define double @test_vfrcz_sd_0(double %a) {
; CHECK-LABEL: @test_vfrcz_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
%4 = extractelement <2 x double> %3, i32 0
ret double %4
}
define double @test_vfrcz_sd_1(double %a) {
; CHECK-LABEL: @test_vfrcz_sd_1(
; CHECK-NEXT: ret double 0.000000e+00
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
%4 = extractelement <2 x double> %3, i32 1
ret double %4
}
define <4 x float> @test_vfrcz_ss(<4 x float> %a) {
; CHECK-LABEL: @test_vfrcz_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[A:%.*]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %3)
ret <4 x float> %4
}
define float @test_vfrcz_ss_0(float %a) {
; CHECK-LABEL: @test_vfrcz_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: ret float [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
%6 = extractelement <4 x float> %5, i32 0
ret float %6
}
define float @test_vfrcz_ss_3(float %a) {
; CHECK-LABEL: @test_vfrcz_ss_3(
; CHECK-NEXT: ret float 0.000000e+00
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
%6 = extractelement <4 x float> %5, i32 3
ret float %6
}
define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: @cmp_slt_v2i64(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i64> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %1
}
define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: @cmp_ult_v2i64(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %1
}
define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: @cmp_sle_v2i64(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sle <2 x i64> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %1
}
define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: @cmp_ule_v2i64(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %1
}
define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @cmp_sgt_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %1
}
define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @cmp_ugt_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %1
}
define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @cmp_sge_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %1
}
define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @cmp_uge_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %1
}
define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: @cmp_seq_v8i16(
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %1
}
define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: @cmp_ueq_v8i16(
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %1
}
define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: @cmp_sne_v8i16(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %1
}
define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: @cmp_une_v8i16(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %1
}
define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: @cmp_strue_v16i8(
; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
;
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %1
}
define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: @cmp_utrue_v16i8(
; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
;
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %1
}
define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: @cmp_sfalse_v16i8(
; CHECK-NEXT: ret <16 x i8> zeroinitializer
;
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %1
}
define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: @cmp_ufalse_v16i8(
; CHECK-NEXT: ret <16 x i8> zeroinitializer
;
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %1
}
declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone

View File

@ -0,0 +1,573 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"
; Bitcasts between vectors and scalars are valid.
; PR4487
define i32 @test1(i64 %a) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: ret i32 0
;
%t1 = bitcast i64 %a to <2 x i32>
%t2 = bitcast i64 %a to <2 x i32>
%t3 = xor <2 x i32> %t1, %t2
%t4 = extractelement <2 x i32> %t3, i32 0
ret i32 %t4
}
; Perform the bitwise logic in the source type of the operands to eliminate bitcasts.
define <2 x i32> @xor_two_vector_bitcasts(<1 x i64> %a, <1 x i64> %b) {
; CHECK-LABEL: @xor_two_vector_bitcasts(
; CHECK-NEXT: [[T31:%.*]] = xor <1 x i64> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[T3:%.*]] = bitcast <1 x i64> [[T31]] to <2 x i32>
; CHECK-NEXT: ret <2 x i32> [[T3]]
;
%t1 = bitcast <1 x i64> %a to <2 x i32>
%t2 = bitcast <1 x i64> %b to <2 x i32>
%t3 = xor <2 x i32> %t1, %t2
ret <2 x i32> %t3
}
; No change. Bitcasts are canonicalized above bitwise logic.
define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
; CHECK-LABEL: @xor_bitcast_vec_to_vec(
; CHECK-NEXT: [[T1:%.*]] = bitcast <1 x i64> [[A:%.*]] to <2 x i32>
; CHECK-NEXT: [[T2:%.*]] = xor <2 x i32> [[T1]], <i32 1, i32 2>
; CHECK-NEXT: ret <2 x i32> [[T2]]
;
%t1 = bitcast <1 x i64> %a to <2 x i32>
%t2 = xor <2 x i32> <i32 1, i32 2>, %t1
ret <2 x i32> %t2
}
; No change. Bitcasts are canonicalized above bitwise logic.
define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
; CHECK-LABEL: @and_bitcast_vec_to_int(
; CHECK-NEXT: [[T1:%.*]] = bitcast <2 x i32> [[A:%.*]] to i64
; CHECK-NEXT: [[T2:%.*]] = and i64 [[T1]], 3
; CHECK-NEXT: ret i64 [[T2]]
;
%t1 = bitcast <2 x i32> %a to i64
%t2 = and i64 %t1, 3
ret i64 %t2
}
; No change. Bitcasts are canonicalized above bitwise logic.
define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
; CHECK-LABEL: @or_bitcast_int_to_vec(
; CHECK-NEXT: [[T1:%.*]] = bitcast i64 [[A:%.*]] to <2 x i32>
; CHECK-NEXT: [[T2:%.*]] = or <2 x i32> [[T1]], <i32 1, i32 2>
; CHECK-NEXT: ret <2 x i32> [[T2]]
;
%t1 = bitcast i64 %a to <2 x i32>
%t2 = or <2 x i32> %t1, <i32 1, i32 2>
ret <2 x i32> %t2
}
; PR26702 - https://bugs.llvm.org//show_bug.cgi?id=26702
; Bitcast is canonicalized above logic, so we can see the not-not pattern.
define <2 x i64> @is_negative(<4 x i32> %x) {
; CHECK-LABEL: @is_negative(
; CHECK-NEXT: [[LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], <i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[LOBIT]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
;
%lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc = bitcast <4 x i32> %not to <2 x i64>
%notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
ret <2 x i64> %notnot
}
; This variation has an extra bitcast at the end. This means that the 2nd xor
; can be done in <4 x i32> to eliminate a bitcast regardless of canonicalizaion.
define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) {
; CHECK-LABEL: @is_negative_bonus_bitcast(
; CHECK-NEXT: [[LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], <i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT: ret <4 x i32> [[LOBIT]]
;
%lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc = bitcast <4 x i32> %not to <2 x i64>
%notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
%bc2 = bitcast <2 x i64> %notnot to <4 x i32>
ret <4 x i32> %bc2
}
; Bitcasts are canonicalized above bitwise logic.
define <2 x i8> @canonicalize_bitcast_logic_with_constant(<4 x i4> %x) {
; CHECK-LABEL: @canonicalize_bitcast_logic_with_constant(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i4> [[X:%.*]] to <2 x i8>
; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], <i8 -128, i8 -128>
; CHECK-NEXT: ret <2 x i8> [[B]]
;
%a = and <4 x i4> %x, <i4 0, i4 8, i4 0, i4 8>
%b = bitcast <4 x i4> %a to <2 x i8>
ret <2 x i8> %b
}
; PR27925 - https://llvm.org/bugs/show_bug.cgi?id=27925
define <4 x i32> @bitcasts_and_bitcast(<4 x i32> %a, <8 x i16> %b) {
; CHECK-LABEL: @bitcasts_and_bitcast(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <4 x i32>
; CHECK-NEXT: [[BC3:%.*]] = and <4 x i32> [[TMP1]], [[A:%.*]]
; CHECK-NEXT: ret <4 x i32> [[BC3]]
;
%bc1 = bitcast <4 x i32> %a to <2 x i64>
%bc2 = bitcast <8 x i16> %b to <2 x i64>
%and = and <2 x i64> %bc2, %bc1
%bc3 = bitcast <2 x i64> %and to <4 x i32>
ret <4 x i32> %bc3
}
; The destination must have an integer element type.
; FIXME: We can still eliminate one bitcast in this test by doing the logic op
; in the type of the input that has an integer element type.
define <4 x float> @bitcasts_and_bitcast_to_fp(<4 x float> %a, <8 x i16> %b) {
; CHECK-LABEL: @bitcasts_and_bitcast_to_fp(
; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64>
; CHECK-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[B:%.*]] to <2 x i64>
; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[BC2]], [[BC1]]
; CHECK-NEXT: [[BC3:%.*]] = bitcast <2 x i64> [[AND]] to <4 x float>
; CHECK-NEXT: ret <4 x float> [[BC3]]
;
%bc1 = bitcast <4 x float> %a to <2 x i64>
%bc2 = bitcast <8 x i16> %b to <2 x i64>
%and = and <2 x i64> %bc2, %bc1
%bc3 = bitcast <2 x i64> %and to <4 x float>
ret <4 x float> %bc3
}
; FIXME: Transform limited from changing vector op to integer op to avoid codegen problems.
define i128 @bitcast_or_bitcast(i128 %a, <2 x i64> %b) {
; CHECK-LABEL: @bitcast_or_bitcast(
; CHECK-NEXT: [[BC1:%.*]] = bitcast i128 [[A:%.*]] to <2 x i64>
; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[BC1]], [[B:%.*]]
; CHECK-NEXT: [[BC2:%.*]] = bitcast <2 x i64> [[OR]] to i128
; CHECK-NEXT: ret i128 [[BC2]]
;
%bc1 = bitcast i128 %a to <2 x i64>
%or = or <2 x i64> %b, %bc1
%bc2 = bitcast <2 x i64> %or to i128
ret i128 %bc2
}
; FIXME: Transform limited from changing integer op to vector op to avoid codegen problems.
define <4 x i32> @bitcast_xor_bitcast(<4 x i32> %a, i128 %b) {
; CHECK-LABEL: @bitcast_xor_bitcast(
; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[A:%.*]] to i128
; CHECK-NEXT: [[XOR:%.*]] = xor i128 [[BC1]], [[B:%.*]]
; CHECK-NEXT: [[BC2:%.*]] = bitcast i128 [[XOR]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[BC2]]
;
%bc1 = bitcast <4 x i32> %a to i128
%xor = xor i128 %bc1, %b
%bc2 = bitcast i128 %xor to <4 x i32>
ret <4 x i32> %bc2
}
; https://llvm.org/bugs/show_bug.cgi?id=6137#c6
define <4 x float> @bitcast_vector_select(<4 x float> %x, <2 x i64> %y, <4 x i1> %cmp) {
; CHECK-LABEL: @bitcast_vector_select(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x float>
; CHECK-NEXT: [[T7:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x float> [[X:%.*]], <4 x float> [[TMP1]]
; CHECK-NEXT: ret <4 x float> [[T7]]
;
%t4 = bitcast <4 x float> %x to <4 x i32>
%t5 = bitcast <2 x i64> %y to <4 x i32>
%t6 = select <4 x i1> %cmp, <4 x i32> %t4, <4 x i32> %t5
%t7 = bitcast <4 x i32> %t6 to <4 x float>
ret <4 x float> %t7
}
define float @bitcast_scalar_select_of_scalars(float %x, i32 %y, i1 %cmp) {
; CHECK-LABEL: @bitcast_scalar_select_of_scalars(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[Y:%.*]] to float
; CHECK-NEXT: [[T7:%.*]] = select i1 [[CMP:%.*]], float [[X:%.*]], float [[TMP1]]
; CHECK-NEXT: ret float [[T7]]
;
%t4 = bitcast float %x to i32
%t6 = select i1 %cmp, i32 %t4, i32 %y
%t7 = bitcast i32 %t6 to float
ret float %t7
}
; FIXME: We should change the select operand types to scalars, but we need to make
; sure the backend can reverse that transform if needed.
define float @bitcast_scalar_select_type_mismatch1(float %x, <4 x i8> %y, i1 %cmp) {
; CHECK-LABEL: @bitcast_scalar_select_type_mismatch1(
; CHECK-NEXT: [[T4:%.*]] = bitcast float [[X:%.*]] to <4 x i8>
; CHECK-NEXT: [[T6:%.*]] = select i1 [[CMP:%.*]], <4 x i8> [[T4]], <4 x i8> [[Y:%.*]]
; CHECK-NEXT: [[T7:%.*]] = bitcast <4 x i8> [[T6]] to float
; CHECK-NEXT: ret float [[T7]]
;
%t4 = bitcast float %x to <4 x i8>
%t6 = select i1 %cmp, <4 x i8> %t4, <4 x i8> %y
%t7 = bitcast <4 x i8> %t6 to float
ret float %t7
}
; FIXME: We should change the select operand types to vectors, but we need to make
; sure the backend can reverse that transform if needed.
define <4 x i8> @bitcast_scalar_select_type_mismatch2(<4 x i8> %x, float %y, i1 %cmp) {
; CHECK-LABEL: @bitcast_scalar_select_type_mismatch2(
; CHECK-NEXT: [[T4:%.*]] = bitcast <4 x i8> [[X:%.*]] to float
; CHECK-NEXT: [[T6:%.*]] = select i1 [[CMP:%.*]], float [[T4]], float [[Y:%.*]]
; CHECK-NEXT: [[T7:%.*]] = bitcast float [[T6]] to <4 x i8>
; CHECK-NEXT: ret <4 x i8> [[T7]]
;
%t4 = bitcast <4 x i8> %x to float
%t6 = select i1 %cmp, float %t4, float %y
%t7 = bitcast float %t6 to <4 x i8>
ret <4 x i8> %t7
}
define <4 x float> @bitcast_scalar_select_of_vectors(<4 x float> %x, <2 x i64> %y, i1 %cmp) {
; CHECK-LABEL: @bitcast_scalar_select_of_vectors(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x float>
; CHECK-NEXT: [[T7:%.*]] = select i1 [[CMP:%.*]], <4 x float> [[X:%.*]], <4 x float> [[TMP1]]
; CHECK-NEXT: ret <4 x float> [[T7]]
;
%t4 = bitcast <4 x float> %x to <4 x i32>
%t5 = bitcast <2 x i64> %y to <4 x i32>
%t6 = select i1 %cmp, <4 x i32> %t4, <4 x i32> %t5
%t7 = bitcast <4 x i32> %t6 to <4 x float>
ret <4 x float> %t7
}
; Can't change the type of the vector select if the dest type is scalar.
define float @bitcast_vector_select_no_fold1(float %x, <2 x i16> %y, <4 x i1> %cmp) {
; CHECK-LABEL: @bitcast_vector_select_no_fold1(
; CHECK-NEXT: [[T4:%.*]] = bitcast float [[X:%.*]] to <4 x i8>
; CHECK-NEXT: [[T5:%.*]] = bitcast <2 x i16> [[Y:%.*]] to <4 x i8>
; CHECK-NEXT: [[T6:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i8> [[T4]], <4 x i8> [[T5]]
; CHECK-NEXT: [[T7:%.*]] = bitcast <4 x i8> [[T6]] to float
; CHECK-NEXT: ret float [[T7]]
;
%t4 = bitcast float %x to <4 x i8>
%t5 = bitcast <2 x i16> %y to <4 x i8>
%t6 = select <4 x i1> %cmp, <4 x i8> %t4, <4 x i8> %t5
%t7 = bitcast <4 x i8> %t6 to float
ret float %t7
}
; Can't change the type of the vector select if the number of elements in the dest type is not the same.
define <2 x float> @bitcast_vector_select_no_fold2(<2 x float> %x, <4 x i16> %y, <8 x i1> %cmp) {
; CHECK-LABEL: @bitcast_vector_select_no_fold2(
; CHECK-NEXT: [[T4:%.*]] = bitcast <2 x float> [[X:%.*]] to <8 x i8>
; CHECK-NEXT: [[T5:%.*]] = bitcast <4 x i16> [[Y:%.*]] to <8 x i8>
; CHECK-NEXT: [[T6:%.*]] = select <8 x i1> [[CMP:%.*]], <8 x i8> [[T4]], <8 x i8> [[T5]]
; CHECK-NEXT: [[T7:%.*]] = bitcast <8 x i8> [[T6]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[T7]]
;
%t4 = bitcast <2 x float> %x to <8 x i8>
%t5 = bitcast <4 x i16> %y to <8 x i8>
%t6 = select <8 x i1> %cmp, <8 x i8> %t4, <8 x i8> %t5
%t7 = bitcast <8 x i8> %t6 to <2 x float>
ret <2 x float> %t7
}
; Optimize bitcasts that are extracting low element of vector. This happens because of SRoA.
; rdar://7892780
define float @test2(<2 x float> %A, <2 x i32> %B) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i32> [[B:%.*]] to <2 x float>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[BC]], i32 0
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
; CHECK-NEXT: ret float [[ADD]]
;
%tmp28 = bitcast <2 x float> %A to i64 ; <i64> [#uses=2]
%tmp23 = trunc i64 %tmp28 to i32 ; <i32> [#uses=1]
%tmp24 = bitcast i32 %tmp23 to float ; <float> [#uses=1]
%tmp = bitcast <2 x i32> %B to i64
%tmp2 = trunc i64 %tmp to i32 ; <i32> [#uses=1]
%tmp4 = bitcast i32 %tmp2 to float ; <float> [#uses=1]
%add = fadd float %tmp24, %tmp4
ret float %add
}
; Optimize bitcasts that are extracting other elements of a vector. This happens because of SRoA.
; rdar://7892780
define float @test3(<2 x float> %A, <2 x i64> %B) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 1
; CHECK-NEXT: [[BC2:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x float>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BC2]], i32 2
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
; CHECK-NEXT: ret float [[ADD]]
;
%tmp28 = bitcast <2 x float> %A to i64
%tmp29 = lshr i64 %tmp28, 32
%tmp23 = trunc i64 %tmp29 to i32
%tmp24 = bitcast i32 %tmp23 to float
%tmp = bitcast <2 x i64> %B to i128
%tmp1 = lshr i128 %tmp, 64
%tmp2 = trunc i128 %tmp1 to i32
%tmp4 = bitcast i32 %tmp2 to float
%add = fadd float %tmp24, %tmp4
ret float %add
}
; Both bitcasts are unnecessary; change the extractelement.
define float @bitcast_extelt1(<2 x float> %A) {
; CHECK-LABEL: @bitcast_extelt1(
; CHECK-NEXT: [[BC2:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
; CHECK-NEXT: ret float [[BC2]]
;
%bc1 = bitcast <2 x float> %A to <2 x i32>
%ext = extractelement <2 x i32> %bc1, i32 0
%bc2 = bitcast i32 %ext to float
ret float %bc2
}
; Second bitcast can be folded into the first.
define i64 @bitcast_extelt2(<4 x float> %A) {
; CHECK-LABEL: @bitcast_extelt2(
; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64>
; CHECK-NEXT: [[BC2:%.*]] = extractelement <2 x i64> [[BC]], i32 1
; CHECK-NEXT: ret i64 [[BC2]]
;
%bc1 = bitcast <4 x float> %A to <2 x double>
%ext = extractelement <2 x double> %bc1, i32 1
%bc2 = bitcast double %ext to i64
ret i64 %bc2
}
; TODO: This should return %A.
define <2 x i32> @bitcast_extelt3(<2 x i32> %A) {
; CHECK-LABEL: @bitcast_extelt3(
; CHECK-NEXT: [[BC1:%.*]] = bitcast <2 x i32> [[A:%.*]] to <1 x i64>
; CHECK-NEXT: [[EXT:%.*]] = extractelement <1 x i64> [[BC1]], i32 0
; CHECK-NEXT: [[BC2:%.*]] = bitcast i64 [[EXT]] to <2 x i32>
; CHECK-NEXT: ret <2 x i32> [[BC2]]
;
%bc1 = bitcast <2 x i32> %A to <1 x i64>
%ext = extractelement <1 x i64> %bc1, i32 0
%bc2 = bitcast i64 %ext to <2 x i32>
ret <2 x i32> %bc2
}
; Handle the case where the input is not a vector.
define double @bitcast_extelt4(i128 %A) {
; CHECK-LABEL: @bitcast_extelt4(
; CHECK-NEXT: [[BC:%.*]] = bitcast i128 [[A:%.*]] to <2 x double>
; CHECK-NEXT: [[BC2:%.*]] = extractelement <2 x double> [[BC]], i32 0
; CHECK-NEXT: ret double [[BC2]]
;
%bc1 = bitcast i128 %A to <2 x i64>
%ext = extractelement <2 x i64> %bc1, i32 0
%bc2 = bitcast i64 %ext to double
ret double %bc2
}
define <2 x i32> @test4(i32 %A, i32 %B){
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[B:%.*]], i32 1
; CHECK-NEXT: ret <2 x i32> [[TMP2]]
;
%tmp38 = zext i32 %A to i64
%tmp32 = zext i32 %B to i64
%tmp33 = shl i64 %tmp32, 32
%ins35 = or i64 %tmp33, %tmp38
%tmp43 = bitcast i64 %ins35 to <2 x i32>
ret <2 x i32> %tmp43
}
; rdar://8360454
define <2 x float> @test5(float %A, float %B) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[B:%.*]], i32 1
; CHECK-NEXT: ret <2 x float> [[TMP2]]
;
%tmp37 = bitcast float %A to i32
%tmp38 = zext i32 %tmp37 to i64
%tmp31 = bitcast float %B to i32
%tmp32 = zext i32 %tmp31 to i64
%tmp33 = shl i64 %tmp32, 32
%ins35 = or i64 %tmp33, %tmp38
%tmp43 = bitcast i64 %ins35 to <2 x float>
ret <2 x float> %tmp43
}
define <2 x float> @test6(float %A){
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> <float 4.200000e+01, float undef>, float [[A:%.*]], i32 1
; CHECK-NEXT: ret <2 x float> [[TMP1]]
;
%tmp23 = bitcast float %A to i32
%tmp24 = zext i32 %tmp23 to i64
%tmp25 = shl i64 %tmp24, 32
%mask20 = or i64 %tmp25, 1109917696
%tmp35 = bitcast i64 %mask20 to <2 x float>
ret <2 x float> %tmp35
}
define i64 @ISPC0(i64 %in) {
; CHECK-LABEL: @ISPC0(
; CHECK-NEXT: ret i64 0
;
%out = and i64 %in, xor (i64 bitcast (<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1> to i64), i64 -1)
ret i64 %out
}
define i64 @Vec2(i64 %in) {
; CHECK-LABEL: @Vec2(
; CHECK-NEXT: ret i64 0
;
%out = and i64 %in, xor (i64 bitcast (<4 x i16> <i16 0, i16 0, i16 0, i16 0> to i64), i64 0)
ret i64 %out
}
define i64 @All11(i64 %in) {
; CHECK-LABEL: @All11(
; CHECK-NEXT: ret i64 0
;
%out = and i64 %in, xor (i64 bitcast (<2 x float> bitcast (i64 -1 to <2 x float>) to i64), i64 -1)
ret i64 %out
}
define i32 @All111(i32 %in) {
; CHECK-LABEL: @All111(
; CHECK-NEXT: ret i32 0
;
%out = and i32 %in, xor (i32 bitcast (<1 x float> bitcast (i32 -1 to <1 x float>) to i32), i32 -1)
ret i32 %out
}
define <2 x i16> @BitcastInsert(i32 %a) {
; CHECK-LABEL: @BitcastInsert(
; CHECK-NEXT: [[R:%.*]] = bitcast i32 [[A:%.*]] to <2 x i16>
; CHECK-NEXT: ret <2 x i16> [[R]]
;
%v = insertelement <1 x i32> poison, i32 %a, i32 0
%r = bitcast <1 x i32> %v to <2 x i16>
ret <2 x i16> %r
}
; PR17293
define <2 x i64> @test7(<2 x i8*>* %arg) nounwind {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i8*>* [[ARG:%.*]] to <2 x i64>*
; CHECK-NEXT: [[LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[CAST]], align 16
; CHECK-NEXT: ret <2 x i64> [[LOAD]]
;
%cast = bitcast <2 x i8*>* %arg to <2 x i64>*
%load = load <2 x i64>, <2 x i64>* %cast, align 16
ret <2 x i64> %load
}
define i8 @test8() {
; CHECK-LABEL: @test8(
; CHECK-NEXT: ret i8 -85
;
%res = bitcast <8 x i1> <i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true> to i8
ret i8 %res
}
@g = internal unnamed_addr global i32 undef
define void @constant_fold_vector_to_double() {
; CHECK-LABEL: @constant_fold_vector_to_double(
; CHECK-NEXT: store volatile double 1.000000e+00, double* undef, align 8
; CHECK-NEXT: store volatile double 1.000000e+00, double* undef, align 8
; CHECK-NEXT: store volatile double 1.000000e+00, double* undef, align 8
; CHECK-NEXT: store volatile double 1.000000e+00, double* undef, align 8
; CHECK-NEXT: store volatile double 0xFFFFFFFFFFFFFFFF, double* undef, align 8
; CHECK-NEXT: store volatile double 0x162E000004D2, double* undef, align 8
; CHECK-NEXT: store volatile double bitcast (<2 x i32> <i32 1234, i32 ptrtoint (i32* @g to i32)> to double), double* undef, align 8
; CHECK-NEXT: store volatile double 0x400000003F800000, double* undef, align 8
; CHECK-NEXT: store volatile double 0.000000e+00, double* undef, align 8
; CHECK-NEXT: store volatile double 0.000000e+00, double* undef, align 8
; CHECK-NEXT: store volatile double 0.000000e+00, double* undef, align 8
; CHECK-NEXT: store volatile double 0.000000e+00, double* undef, align 8
; CHECK-NEXT: store volatile double 0.000000e+00, double* undef, align 8
; CHECK-NEXT: store volatile double 0.000000e+00, double* undef, align 8
; CHECK-NEXT: ret void
;
store volatile double bitcast (<1 x i64> <i64 4607182418800017408> to double), double* undef
store volatile double bitcast (<2 x i32> <i32 0, i32 1072693248> to double), double* undef
store volatile double bitcast (<4 x i16> <i16 0, i16 0, i16 0, i16 16368> to double), double* undef
store volatile double bitcast (<8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 240, i8 63> to double), double* undef
store volatile double bitcast (<2 x i32> <i32 -1, i32 -1> to double), double* undef
store volatile double bitcast (<2 x i32> <i32 1234, i32 5678> to double), double* undef
store volatile double bitcast (<2 x i32> <i32 1234, i32 ptrtoint (i32* @g to i32)> to double), double* undef
store volatile double bitcast (<2 x float> <float 1.0, float 2.0> to double), double* undef
store volatile double bitcast (<2 x i32> zeroinitializer to double), double* undef
store volatile double bitcast (<4 x i16> zeroinitializer to double), double* undef
store volatile double bitcast (<8 x i8> zeroinitializer to double), double* undef
store volatile double bitcast (<16 x i4> zeroinitializer to double), double* undef
store volatile double bitcast (<32 x i2> zeroinitializer to double), double* undef
store volatile double bitcast (<64 x i1> zeroinitializer to double), double* undef
ret void
}
define void @constant_fold_vector_to_float() {
; CHECK-LABEL: @constant_fold_vector_to_float(
; CHECK-NEXT: store volatile float 1.000000e+00, float* undef, align 4
; CHECK-NEXT: store volatile float 1.000000e+00, float* undef, align 4
; CHECK-NEXT: store volatile float 1.000000e+00, float* undef, align 4
; CHECK-NEXT: store volatile float 1.000000e+00, float* undef, align 4
; CHECK-NEXT: ret void
;
store volatile float bitcast (<1 x i32> <i32 1065353216> to float), float* undef
store volatile float bitcast (<2 x i16> <i16 0, i16 16256> to float), float* undef
store volatile float bitcast (<4 x i8> <i8 0, i8 0, i8 128, i8 63> to float), float* undef
store volatile float bitcast (<32 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0> to float), float* undef
ret void
}
define void @constant_fold_vector_to_half() {
; CHECK-LABEL: @constant_fold_vector_to_half(
; CHECK-NEXT: store volatile half 0xH4000, half* undef, align 2
; CHECK-NEXT: store volatile half 0xH4000, half* undef, align 2
; CHECK-NEXT: ret void
;
store volatile half bitcast (<2 x i8> <i8 0, i8 64> to half), half* undef
store volatile half bitcast (<4 x i4> <i4 0, i4 0, i4 0, i4 4> to half), half* undef
ret void
}
; Ensure that we do not crash when looking at such a weird bitcast.
define i8* @bitcast_from_single_element_pointer_vector_to_pointer(<1 x i8*> %ptrvec) {
; CHECK-LABEL: @bitcast_from_single_element_pointer_vector_to_pointer(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i8*> [[PTRVEC:%.*]], i32 0
; CHECK-NEXT: ret i8* [[TMP1]]
;
%ptr = bitcast <1 x i8*> %ptrvec to i8*
ret i8* %ptr
}

View File

@ -0,0 +1,167 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
define double @a(<1 x i64> %y) {
; CHECK-LABEL: @a(
; CHECK-NEXT: [[BC:%.*]] = bitcast <1 x i64> [[Y:%.*]] to <1 x double>
; CHECK-NEXT: [[C:%.*]] = extractelement <1 x double> [[BC]], i32 0
; CHECK-NEXT: ret double [[C]]
;
%c = bitcast <1 x i64> %y to double
ret double %c
}
define i64 @b(<1 x i64> %y) {
; CHECK-LABEL: @b(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[Y:%.*]], i32 0
; CHECK-NEXT: ret i64 [[TMP1]]
;
%c = bitcast <1 x i64> %y to i64
ret i64 %c
}
define <1 x i64> @c(double %y) {
; CHECK-LABEL: @c(
; CHECK-NEXT: [[C:%.*]] = bitcast double [[Y:%.*]] to <1 x i64>
; CHECK-NEXT: ret <1 x i64> [[C]]
;
%c = bitcast double %y to <1 x i64>
ret <1 x i64> %c
}
define <1 x i64> @d(i64 %y) {
; CHECK-LABEL: @d(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> undef, i64 [[Y:%.*]], i32 0
; CHECK-NEXT: ret <1 x i64> [[TMP1]]
;
%c = bitcast i64 %y to <1 x i64>
ret <1 x i64> %c
}
define x86_mmx @e(<1 x i64> %y) {
; CHECK-LABEL: @e(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[Y:%.*]], i32 0
; CHECK-NEXT: [[C:%.*]] = bitcast i64 [[TMP1]] to x86_mmx
; CHECK-NEXT: ret x86_mmx [[C]]
;
%c = bitcast <1 x i64> %y to x86_mmx
ret x86_mmx %c
}
define <1 x i64> @f(x86_mmx %y) {
; CHECK-LABEL: @f(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast x86_mmx [[Y:%.*]] to i64
; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
; CHECK-NEXT: ret <1 x i64> [[C]]
;
%c = bitcast x86_mmx %y to <1 x i64>
ret <1 x i64> %c
}
define double @g(x86_mmx %x) {
; CHECK-LABEL: @g(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast x86_mmx [[X:%.*]] to double
; CHECK-NEXT: ret double [[TMP0]]
;
entry:
%0 = bitcast x86_mmx %x to <1 x i64>
%1 = bitcast <1 x i64> %0 to double
ret double %1
}
; FP source is ok.
define <3 x i64> @bitcast_inselt_undef(double %x, i32 %idx) {
; CHECK-LABEL: @bitcast_inselt_undef(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[X:%.*]], i32 [[IDX:%.*]]
; CHECK-NEXT: [[I:%.*]] = bitcast <3 x double> [[TMP1]] to <3 x i64>
; CHECK-NEXT: ret <3 x i64> [[I]]
;
%xb = bitcast double %x to i64
%i = insertelement <3 x i64> poison, i64 %xb, i32 %idx
ret <3 x i64> %i
}
; Integer source is ok; index is anything.
define <3 x float> @bitcast_inselt_undef_fp(i32 %x, i567 %idx) {
; CHECK-LABEL: @bitcast_inselt_undef_fp(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]]
; CHECK-NEXT: [[I:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float>
; CHECK-NEXT: ret <3 x float> [[I]]
;
%xb = bitcast i32 %x to float
%i = insertelement <3 x float> poison, float %xb, i567 %idx
ret <3 x float> %i
}
define <vscale x 3 x float> @bitcast_inselt_undef_vscale(i32 %x, i567 %idx) {
; CHECK-LABEL: @bitcast_inselt_undef_vscale(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <vscale x 3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]]
; CHECK-NEXT: [[I:%.*]] = bitcast <vscale x 3 x i32> [[TMP1]] to <vscale x 3 x float>
; CHECK-NEXT: ret <vscale x 3 x float> [[I]]
;
%xb = bitcast i32 %x to float
%i = insertelement <vscale x 3 x float> poison, float %xb, i567 %idx
ret <vscale x 3 x float> %i
}
declare void @use(i64)
; Negative test - extra use prevents canonicalization
define <3 x i64> @bitcast_inselt_undef_extra_use(double %x, i32 %idx) {
; CHECK-LABEL: @bitcast_inselt_undef_extra_use(
; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64
; CHECK-NEXT: call void @use(i64 [[XB]])
; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> poison, i64 [[XB]], i32 [[IDX:%.*]]
; CHECK-NEXT: ret <3 x i64> [[I]]
;
%xb = bitcast double %x to i64
call void @use(i64 %xb)
%i = insertelement <3 x i64> poison, i64 %xb, i32 %idx
ret <3 x i64> %i
}
; Negative test - source type must be scalar
define <3 x i64> @bitcast_inselt_undef_vec_src(<2 x i32> %x, i32 %idx) {
; CHECK-LABEL: @bitcast_inselt_undef_vec_src(
; CHECK-NEXT: [[XB:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64
; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> poison, i64 [[XB]], i32 [[IDX:%.*]]
; CHECK-NEXT: ret <3 x i64> [[I]]
;
%xb = bitcast <2 x i32> %x to i64
%i = insertelement <3 x i64> poison, i64 %xb, i32 %idx
ret <3 x i64> %i
}
; Negative test - source type must be scalar
define <3 x i64> @bitcast_inselt_undef_from_mmx(x86_mmx %x, i32 %idx) {
; CHECK-LABEL: @bitcast_inselt_undef_from_mmx(
; CHECK-NEXT: [[XB:%.*]] = bitcast x86_mmx [[X:%.*]] to i64
; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> poison, i64 [[XB]], i32 [[IDX:%.*]]
; CHECK-NEXT: ret <3 x i64> [[I]]
;
%xb = bitcast x86_mmx %x to i64
%i = insertelement <3 x i64> poison, i64 %xb, i32 %idx
ret <3 x i64> %i
}
; Reduce number of casts
define <2 x i64> @PR45748(double %x, double %y) {
; CHECK-LABEL: @PR45748(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[Y:%.*]], i32 1
; CHECK-NEXT: [[I1:%.*]] = bitcast <2 x double> [[TMP2]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[I1]]
;
%xb = bitcast double %x to i64
%i0 = insertelement <2 x i64> poison, i64 %xb, i32 0
%yb = bitcast double %y to i64
%i1 = insertelement <2 x i64> %i0, i64 %yb, i32 1
ret <2 x i64> %i1
}

View File

@ -0,0 +1,179 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instcombine -S < %s | FileCheck %s
define <4 x float> @good1(float %arg) {
; CHECK-LABEL: @good1(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> poison, float %arg, i32 0
%t4 = insertelement <4 x float> %t, float %arg, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}
define <4 x float> @good2(float %arg) {
; CHECK-LABEL: @good2(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> poison, float %arg, i32 1
%t4 = insertelement <4 x float> %t, float %arg, i32 2
%t5 = insertelement <4 x float> %t4, float %arg, i32 0
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}
define <4 x float> @good3(float %arg) {
; CHECK-LABEL: @good3(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> zeroinitializer, float %arg, i32 0
%t4 = insertelement <4 x float> %t, float %arg, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}
define <4 x float> @good4(float %arg) {
; CHECK-LABEL: @good4(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[T]], [[T]]
; CHECK-NEXT: [[T7:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x float> [[T7]]
;
%t = insertelement <4 x float> zeroinitializer, float %arg, i32 0
%t4 = insertelement <4 x float> %t, float %arg, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
%t7 = fadd <4 x float> %t6, %t6
ret <4 x float> %t7
}
define <4 x float> @good5(float %v) {
; CHECK-LABEL: @good5(
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> poison, float [[V:%.*]], i32 0
; CHECK-NEXT: [[A1:%.*]] = fadd <4 x float> [[INS1]], [[INS1]]
; CHECK-NEXT: [[INS4:%.*]] = shufflevector <4 x float> [[INS1]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[A1]], [[INS4]]
; CHECK-NEXT: ret <4 x float> [[RES]]
;
%ins1 = insertelement <4 x float> poison, float %v, i32 0
%a1 = fadd <4 x float> %ins1, %ins1
%ins2 = insertelement<4 x float> %ins1, float %v, i32 1
%ins3 = insertelement<4 x float> %ins2, float %v, i32 2
%ins4 = insertelement<4 x float> %ins3, float %v, i32 3
%res = fadd <4 x float> %a1, %ins4
ret <4 x float> %res
}
; The insert is changed to allow the canonical shuffle-splat pattern from element 0.
define <4 x float> @splat_undef1(float %arg) {
; CHECK-LABEL: @splat_undef1(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> poison, float %arg, i32 1
%t4 = insertelement <4 x float> %t, float %arg, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}
; Re-uses the existing first insertelement.
define <4 x float> @splat_undef2(float %arg) {
; CHECK-LABEL: @splat_undef2(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> poison, float %arg, i32 0
%t5 = insertelement <4 x float> %t, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}
define <4 x float> @bad3(float %arg, float %arg2) {
; CHECK-LABEL: @bad3(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T4:%.*]] = insertelement <4 x float> [[T]], float [[ARG2:%.*]], i32 1
; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 2
; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> poison, float %arg, i32 0
%t4 = insertelement <4 x float> %t, float %arg2, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}
define <1 x float> @bad4(float %arg) {
; CHECK-LABEL: @bad4(
; CHECK-NEXT: [[T:%.*]] = insertelement <1 x float> poison, float [[ARG:%.*]], i32 0
; CHECK-NEXT: ret <1 x float> [[T]]
;
%t = insertelement <1 x float> poison, float %arg, i32 0
ret <1 x float> %t
}
; Multiple undef elements are ok.
; TODO: Multiple uses triggers the transform at %t4, but we should sink/scalarize/CSE the splats?
define <4 x float> @splat_undef3(float %arg) {
; CHECK-LABEL: @splat_undef3(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[T7:%.*]] = fadd <4 x float> [[T6]], [[T4]]
; CHECK-NEXT: ret <4 x float> [[T7]]
;
%t = insertelement <4 x float> poison, float %arg, i32 0
%t4 = insertelement <4 x float> %t, float %arg, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
%t7 = fadd <4 x float> %t6, %t4
ret <4 x float> %t7
}
define <4 x float> @bad6(float %arg, i32 %k) {
; CHECK-LABEL: @bad6(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T4:%.*]] = insertelement <4 x float> [[T]], float [[ARG]], i32 1
; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 [[K:%.*]]
; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> poison, float %arg, i32 0
%t4 = insertelement <4 x float> %t, float %arg, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 %k
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}
define <4 x float> @bad7(float %v) {
; CHECK-LABEL: @bad7(
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> poison, float [[V:%.*]], i32 1
; CHECK-NEXT: [[A1:%.*]] = fadd <4 x float> [[INS1]], [[INS1]]
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float [[V]], i32 2
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x float> [[INS2]], float [[V]], i32 3
; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x float> [[INS3]], float [[V]], i32 0
; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[A1]], [[INS4]]
; CHECK-NEXT: ret <4 x float> [[RES]]
;
%ins1 = insertelement <4 x float> poison, float %v, i32 1
%a1 = fadd <4 x float> %ins1, %ins1
%ins2 = insertelement<4 x float> %ins1, float %v, i32 2
%ins3 = insertelement<4 x float> %ins2, float %v, i32 3
%ins4 = insertelement<4 x float> %ins3, float %v, i32 0
%res = fadd <4 x float> %a1, %ins4
ret <4 x float> %res
}

View File

@ -0,0 +1,332 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ANY,LE
; RUN: opt < %s -instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ANY,BE
define i32 @extractelement_out_of_range(<2 x i32> %x) {
; ANY-LABEL: @extractelement_out_of_range(
; ANY-NEXT: ret i32 undef
;
%E1 = extractelement <2 x i32> %x, i8 16
ret i32 %E1
}
define i32 @extractelement_type_out_of_range(<2 x i32> %x) {
; ANY-LABEL: @extractelement_type_out_of_range(
; ANY-NEXT: [[E1:%.*]] = extractelement <2 x i32> [[X:%.*]], i128 0
; ANY-NEXT: ret i32 [[E1]]
;
%E1 = extractelement <2 x i32> %x, i128 0
ret i32 %E1
}
define i32 @bitcasted_inselt_equal_num_elts(float %f) {
; ANY-LABEL: @bitcasted_inselt_equal_num_elts(
; ANY-NEXT: [[R:%.*]] = bitcast float [[F:%.*]] to i32
; ANY-NEXT: ret i32 [[R]]
;
%vf = insertelement <4 x float> poison, float %f, i32 0
%vi = bitcast <4 x float> %vf to <4 x i32>
%r = extractelement <4 x i32> %vi, i32 0
ret i32 %r
}
define i64 @test2(i64 %in) {
; ANY-LABEL: @test2(
; ANY-NEXT: ret i64 [[IN:%.*]]
;
%vec = insertelement <8 x i64> poison, i64 %in, i32 0
%splat = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> zeroinitializer
%add = add <8 x i64> %splat, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
%r = extractelement <8 x i64> %add, i32 0
ret i64 %r
}
define i32 @bitcasted_inselt_wide_source_zero_elt(i64 %x) {
; LE-LABEL: @bitcasted_inselt_wide_source_zero_elt(
; LE-NEXT: [[R:%.*]] = trunc i64 [[X:%.*]] to i32
; LE-NEXT: ret i32 [[R]]
;
; BE-LABEL: @bitcasted_inselt_wide_source_zero_elt(
; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32
; BE-NEXT: ret i32 [[R]]
;
%i = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
%b = bitcast <2 x i64> %i to <4 x i32>
%r = extractelement <4 x i32> %b, i32 0
ret i32 %r
}
define i16 @bitcasted_inselt_wide_source_modulo_elt(i64 %x) {
; LE-LABEL: @bitcasted_inselt_wide_source_modulo_elt(
; LE-NEXT: [[R:%.*]] = trunc i64 [[X:%.*]] to i16
; LE-NEXT: ret i16 [[R]]
;
; BE-LABEL: @bitcasted_inselt_wide_source_modulo_elt(
; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 48
; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i16
; BE-NEXT: ret i16 [[R]]
;
%i = insertelement <2 x i64> poison, i64 %x, i32 1
%b = bitcast <2 x i64> %i to <8 x i16>
%r = extractelement <8 x i16> %b, i32 4
ret i16 %r
}
define i32 @bitcasted_inselt_wide_source_not_modulo_elt(i64 %x) {
; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt(
; LE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
; LE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32
; LE-NEXT: ret i32 [[R]]
;
; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt(
; BE-NEXT: [[R:%.*]] = trunc i64 [[X:%.*]] to i32
; BE-NEXT: ret i32 [[R]]
;
%i = insertelement <2 x i64> poison, i64 %x, i32 0
%b = bitcast <2 x i64> %i to <4 x i32>
%r = extractelement <4 x i32> %b, i32 1
ret i32 %r
}
define i8 @bitcasted_inselt_wide_source_not_modulo_elt_not_half(i32 %x) {
; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half(
; LE-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 16
; LE-NEXT: [[R:%.*]] = trunc i32 [[TMP1]] to i8
; LE-NEXT: ret i8 [[R]]
;
; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half(
; BE-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 8
; BE-NEXT: [[R:%.*]] = trunc i32 [[TMP1]] to i8
; BE-NEXT: ret i8 [[R]]
;
%i = insertelement <2 x i32> poison, i32 %x, i32 0
%b = bitcast <2 x i32> %i to <8 x i8>
%r = extractelement <8 x i8> %b, i32 2
ret i8 %r
}
define i3 @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(i15 %x) {
; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(
; LE-NEXT: [[TMP1:%.*]] = lshr i15 [[X:%.*]], 3
; LE-NEXT: [[R:%.*]] = trunc i15 [[TMP1]] to i3
; LE-NEXT: ret i3 [[R]]
;
; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(
; BE-NEXT: [[TMP1:%.*]] = lshr i15 [[X:%.*]], 9
; BE-NEXT: [[R:%.*]] = trunc i15 [[TMP1]] to i3
; BE-NEXT: ret i3 [[R]]
;
%i = insertelement <3 x i15> poison, i15 %x, i32 0
%b = bitcast <3 x i15> %i to <15 x i3>
%r = extractelement <15 x i3> %b, i32 1
ret i3 %r
}
; Negative test for the above fold, but we can remove the insert here.
define i8 @bitcasted_inselt_wide_source_wrong_insert(<2 x i32> %v, i32 %x) {
; ANY-LABEL: @bitcasted_inselt_wide_source_wrong_insert(
; ANY-NEXT: [[B:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8>
; ANY-NEXT: [[R:%.*]] = extractelement <8 x i8> [[B]], i32 2
; ANY-NEXT: ret i8 [[R]]
;
%i = insertelement <2 x i32> %v, i32 %x, i32 1
%b = bitcast <2 x i32> %i to <8 x i8>
%r = extractelement <8 x i8> %b, i32 2
ret i8 %r
}
; Partial negative test for the above fold, extra uses are not allowed if shift is needed.
declare void @use(<8 x i8>)
define i8 @bitcasted_inselt_wide_source_uses(i32 %x) {
; LE-LABEL: @bitcasted_inselt_wide_source_uses(
; LE-NEXT: [[I:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0
; LE-NEXT: [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
; LE-NEXT: call void @use(<8 x i8> [[B]])
; LE-NEXT: [[R:%.*]] = extractelement <8 x i8> [[B]], i32 3
; LE-NEXT: ret i8 [[R]]
;
; BE-LABEL: @bitcasted_inselt_wide_source_uses(
; BE-NEXT: [[I:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0
; BE-NEXT: [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
; BE-NEXT: call void @use(<8 x i8> [[B]])
; BE-NEXT: [[R:%.*]] = trunc i32 [[X]] to i8
; BE-NEXT: ret i8 [[R]]
;
%i = insertelement <2 x i32> poison, i32 %x, i32 0
%b = bitcast <2 x i32> %i to <8 x i8>
call void @use(<8 x i8> %b)
%r = extractelement <8 x i8> %b, i32 3
ret i8 %r
}
define float @bitcasted_inselt_to_FP(i64 %x) {
; LE-LABEL: @bitcasted_inselt_to_FP(
; LE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
; LE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; LE-NEXT: [[R:%.*]] = bitcast i32 [[TMP2]] to float
; LE-NEXT: ret float [[R]]
;
; BE-LABEL: @bitcasted_inselt_to_FP(
; BE-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
; BE-NEXT: [[R:%.*]] = bitcast i32 [[TMP1]] to float
; BE-NEXT: ret float [[R]]
;
%i = insertelement <2 x i64> poison, i64 %x, i32 0
%b = bitcast <2 x i64> %i to <4 x float>
%r = extractelement <4 x float> %b, i32 1
ret float %r
}
declare void @use_v2i128(<2 x i128>)
declare void @use_v8f32(<8 x float>)
define float @bitcasted_inselt_to_FP_uses(i128 %x) {
; ANY-LABEL: @bitcasted_inselt_to_FP_uses(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x i128> poison, i128 [[X:%.*]], i32 0
; ANY-NEXT: call void @use_v2i128(<2 x i128> [[I]])
; ANY-NEXT: [[B:%.*]] = bitcast <2 x i128> [[I]] to <8 x float>
; ANY-NEXT: [[R:%.*]] = extractelement <8 x float> [[B]], i32 1
; ANY-NEXT: ret float [[R]]
;
%i = insertelement <2 x i128> poison, i128 %x, i32 0
call void @use_v2i128(<2 x i128> %i)
%b = bitcast <2 x i128> %i to <8 x float>
%r = extractelement <8 x float> %b, i32 1
ret float %r
}
define float @bitcasted_inselt_to_FP_uses2(i128 %x) {
; ANY-LABEL: @bitcasted_inselt_to_FP_uses2(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x i128> poison, i128 [[X:%.*]], i32 0
; ANY-NEXT: [[B:%.*]] = bitcast <2 x i128> [[I]] to <8 x float>
; ANY-NEXT: call void @use_v8f32(<8 x float> [[B]])
; ANY-NEXT: [[R:%.*]] = extractelement <8 x float> [[B]], i32 1
; ANY-NEXT: ret float [[R]]
;
%i = insertelement <2 x i128> poison, i128 %x, i32 0
%b = bitcast <2 x i128> %i to <8 x float>
call void @use_v8f32(<8 x float> %b)
%r = extractelement <8 x float> %b, i32 1
ret float %r
}
define i32 @bitcasted_inselt_from_FP(double %x) {
; LE-LABEL: @bitcasted_inselt_from_FP(
; LE-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
; LE-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 32
; LE-NEXT: [[R:%.*]] = trunc i64 [[TMP2]] to i32
; LE-NEXT: ret i32 [[R]]
;
; BE-LABEL: @bitcasted_inselt_from_FP(
; BE-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32
; BE-NEXT: ret i32 [[R]]
;
%i = insertelement <2 x double> poison, double %x, i32 0
%b = bitcast <2 x double> %i to <4 x i32>
%r = extractelement <4 x i32> %b, i32 1
ret i32 %r
}
declare void @use_v2f64(<2 x double>)
declare void @use_v8i16(<8 x i16>)
define i16 @bitcasted_inselt_from_FP_uses(double %x) {
; ANY-LABEL: @bitcasted_inselt_from_FP_uses(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0
; ANY-NEXT: call void @use_v2f64(<2 x double> [[I]])
; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <8 x i16>
; ANY-NEXT: [[R:%.*]] = extractelement <8 x i16> [[B]], i32 1
; ANY-NEXT: ret i16 [[R]]
;
%i = insertelement <2 x double> poison, double %x, i32 0
call void @use_v2f64(<2 x double> %i)
%b = bitcast <2 x double> %i to <8 x i16>
%r = extractelement <8 x i16> %b, i32 1
ret i16 %r
}
define i16 @bitcasted_inselt_from_FP_uses2(double %x) {
; ANY-LABEL: @bitcasted_inselt_from_FP_uses2(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0
; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <8 x i16>
; ANY-NEXT: call void @use_v8i16(<8 x i16> [[B]])
; ANY-NEXT: [[R:%.*]] = extractelement <8 x i16> [[B]], i32 1
; ANY-NEXT: ret i16 [[R]]
;
%i = insertelement <2 x double> poison, double %x, i32 0
%b = bitcast <2 x double> %i to <8 x i16>
call void @use_v8i16(<8 x i16> %b)
%r = extractelement <8 x i16> %b, i32 1
ret i16 %r
}
define float @bitcasted_inselt_to_and_from_FP(double %x) {
; ANY-LABEL: @bitcasted_inselt_to_and_from_FP(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0
; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <4 x float>
; ANY-NEXT: [[R:%.*]] = extractelement <4 x float> [[B]], i32 1
; ANY-NEXT: ret float [[R]]
;
%i = insertelement <2 x double> poison, double %x, i32 0
%b = bitcast <2 x double> %i to <4 x float>
%r = extractelement <4 x float> %b, i32 1
ret float %r
}
define float @bitcasted_inselt_to_and_from_FP_uses(double %x) {
; ANY-LABEL: @bitcasted_inselt_to_and_from_FP_uses(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0
; ANY-NEXT: call void @use_v2f64(<2 x double> [[I]])
; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <4 x float>
; ANY-NEXT: [[R:%.*]] = extractelement <4 x float> [[B]], i32 1
; ANY-NEXT: ret float [[R]]
;
%i = insertelement <2 x double> poison, double %x, i32 0
call void @use_v2f64(<2 x double> %i)
%b = bitcast <2 x double> %i to <4 x float>
%r = extractelement <4 x float> %b, i32 1
ret float %r
}
declare void @use_v4f32(<4 x float>)
define float @bitcasted_inselt_to_and_from_FP_uses2(double %x) {
; ANY-LABEL: @bitcasted_inselt_to_and_from_FP_uses2(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0
; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <4 x float>
; ANY-NEXT: call void @use_v4f32(<4 x float> [[B]])
; ANY-NEXT: [[R:%.*]] = extractelement <4 x float> [[B]], i32 1
; ANY-NEXT: ret float [[R]]
;
%i = insertelement <2 x double> poison, double %x, i32 0
%b = bitcast <2 x double> %i to <4 x float>
call void @use_v4f32(<4 x float> %b)
%r = extractelement <4 x float> %b, i32 1
ret float %r
}
; This would crash/assert because the logic for collectShuffleElements()
; does not consider the possibility of invalid insert/extract operands.
define <4 x double> @invalid_extractelement(<2 x double> %a, <4 x double> %b, double* %p) {
; ANY-LABEL: @invalid_extractelement(
; ANY-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; ANY-NEXT: [[T4:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 undef, i32 1, i32 4, i32 3>
; ANY-NEXT: [[E:%.*]] = extractelement <4 x double> [[B]], i32 1
; ANY-NEXT: store double [[E]], double* [[P:%.*]], align 8
; ANY-NEXT: [[R:%.*]] = insertelement <4 x double> [[T4]], double undef, i64 0
; ANY-NEXT: ret <4 x double> [[R]]
;
%t3 = extractelement <2 x double> %a, i32 0
%t4 = insertelement <4 x double> %b, double %t3, i32 2
%e = extractelement <4 x double> %t4, i32 1
store double %e, double* %p
%e1 = extractelement <2 x double> %a, i32 4 ; invalid index
%r = insertelement <4 x double> %t4, double %e1, i64 0
ret <4 x double> %r
}

View File

@ -0,0 +1,35 @@
; RUN: opt < %s -instcombine -S | not grep zeroinitializer
define void @foo(i64 %A, i64 %B) {
bb8:
br label %bb30
bb30:
%s0 = phi i64 [ 0, %bb8 ], [ %r21, %bb30 ]
%l0 = phi i64 [ -2222, %bb8 ], [ %r23, %bb30 ]
%r2 = add i64 %s0, %B
%r3 = inttoptr i64 %r2 to <2 x double>*
%r4 = load <2 x double>, <2 x double>* %r3, align 8
%r6 = bitcast <2 x double> %r4 to <2 x i64>
%r7 = bitcast <2 x double> zeroinitializer to <2 x i64>
%r8 = insertelement <2 x i64> poison, i64 9223372036854775807, i32 0
%r9 = insertelement <2 x i64> poison, i64 -9223372036854775808, i32 0
%r10 = insertelement <2 x i64> %r8, i64 9223372036854775807, i32 1
%r11 = insertelement <2 x i64> %r9, i64 -9223372036854775808, i32 1
%r12 = and <2 x i64> %r6, %r10
%r13 = and <2 x i64> %r7, %r11
%r14 = or <2 x i64> %r12, %r13
%r15 = bitcast <2 x i64> %r14 to <2 x double>
%r18 = add i64 %s0, %A
%r19 = inttoptr i64 %r18 to <2 x double>*
store <2 x double> %r15, <2 x double>* %r19, align 8
%r21 = add i64 16, %s0
%r23 = add i64 1, %l0
%r25 = icmp slt i64 %r23, 0
%r26 = zext i1 %r25 to i64
%r27 = icmp ne i64 %r26, 0
br i1 %r27, label %bb30, label %bb5
bb5:
ret void
}

View File

@ -0,0 +1,127 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
; Tests to verify proper functioning of the icmp folding implemented in
; InstCombiner::foldICmpBitCastConstant
; Specifically, folding:
; icmp <pred> iN X, C
; where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
; and C is a splat of a K-bit pattern
; and SC is a constant vector = <C', C', C', ..., C'>
; Into:
; %E = extractelement <M x iK> %vec, i32 C'
; icmp <pred> iK %E, trunc(C)
define i1 @test_i1_0(i1 %val) {
; CHECK-LABEL: @test_i1_0(
; CHECK-NEXT: [[COND:%.*]] = xor i1 [[VAL:%.*]], true
; CHECK-NEXT: ret i1 [[COND]]
;
%insvec = insertelement <4 x i1> poison, i1 %val, i32 0
%vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> zeroinitializer
%cast = bitcast <4 x i1> %vec to i4
%cond = icmp eq i4 %cast, 0
ret i1 %cond
}
define i1 @test_i1_0_2(i1 %val) {
; CHECK-LABEL: @test_i1_0_2(
; CHECK-NEXT: [[COND:%.*]] = xor i1 [[VAL:%.*]], true
; CHECK-NEXT: ret i1 [[COND]]
;
%insvec = insertelement <4 x i1> poison, i1 %val, i32 2
%vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
%cast = bitcast <4 x i1> %vec to i4
%cond = icmp eq i4 %cast, 0
ret i1 %cond
}
define i1 @test_i1_m1(i1 %val) {
; CHECK-LABEL: @test_i1_m1(
; CHECK-NEXT: ret i1 [[VAL:%.*]]
;
%insvec = insertelement <4 x i1> poison, i1 %val, i32 0
%vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> zeroinitializer
%cast = bitcast <4 x i1> %vec to i4
%cond = icmp eq i4 %cast, -1
ret i1 %cond
}
define i1 @test_i8_pattern(i8 %val) {
; CHECK-LABEL: @test_i8_pattern(
; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[VAL:%.*]], 72
; CHECK-NEXT: ret i1 [[COND]]
;
%insvec = insertelement <4 x i8> poison, i8 %val, i32 0
%vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer
%cast = bitcast <4 x i8> %vec to i32
%cond = icmp eq i32 %cast, 1212696648
ret i1 %cond
}
define i1 @test_i8_pattern_2(i8 %val) {
; CHECK-LABEL: @test_i8_pattern_2(
; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[VAL:%.*]], 72
; CHECK-NEXT: ret i1 [[COND]]
;
%insvec = insertelement <4 x i8> poison, i8 %val, i32 2
%vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
%cast = bitcast <4 x i8> %vec to i32
%cond = icmp eq i32 %cast, 1212696648
ret i1 %cond
}
; Make sure we don't try to fold if the shufflemask has differing element values
define i1 @test_i8_pattern_3(<4 x i8> %invec) {
; CHECK-LABEL: @test_i8_pattern_3(
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i8> [[INVEC:%.*]], <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[VEC]] to i32
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[CAST]], 1212696648
; CHECK-NEXT: ret i1 [[COND]]
;
%vec = shufflevector <4 x i8> %invec, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
%cast = bitcast <4 x i8> %vec to i32
%cond = icmp eq i32 %cast, 1212696648
ret i1 %cond
}
; Make sure we don't try to fold if the compared-to constant isn't a splatted value
define i1 @test_i8_nopattern(i8 %val) {
; CHECK-LABEL: @test_i8_nopattern(
; CHECK-NEXT: [[INSVEC:%.*]] = insertelement <4 x i8> poison, i8 [[VAL:%.*]], i32 0
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i8> [[INSVEC]], <4 x i8> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[VEC]] to i32
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[CAST]], 1212696647
; CHECK-NEXT: ret i1 [[COND]]
;
%insvec = insertelement <4 x i8> poison, i8 %val, i32 0
%vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer
%cast = bitcast <4 x i8> %vec to i32
%cond = icmp eq i32 %cast, 1212696647
ret i1 %cond
}
; Verify that we fold more than just the eq predicate
define i1 @test_i8_ult_pattern(i8 %val) {
; CHECK-LABEL: @test_i8_ult_pattern(
; CHECK-NEXT: [[COND:%.*]] = icmp ult i8 [[VAL:%.*]], 72
; CHECK-NEXT: ret i1 [[COND]]
;
%insvec = insertelement <4 x i8> poison, i8 %val, i32 0
%vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer
%cast = bitcast <4 x i8> %vec to i32
%cond = icmp ult i32 %cast, 1212696648
ret i1 %cond
}
define i1 @extending_shuffle_with_weird_types(<2 x i9> %v) {
; CHECK-LABEL: @extending_shuffle_with_weird_types(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i9> [[V:%.*]], i32 0
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i9 [[TMP1]], 1
; CHECK-NEXT: ret i1 [[CMP]]
;
%splat = shufflevector <2 x i9> %v, <2 x i9> undef, <3 x i32> zeroinitializer
%cast = bitcast <3 x i9> %splat to i27
%cmp = icmp slt i27 %cast, 262657 ; 0x040201
ret i1 %cmp
}

View File

@ -0,0 +1,635 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -instcombine %s | FileCheck %s
define <2 x i8> @add_constant(i8 %x) {
; CHECK-LABEL: @add_constant(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = add <2 x i8> [[INS]], <i8 42, i8 undef>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = add <2 x i8> %ins, <i8 42, i8 undef>
ret <2 x i8> %bo
}
define <2 x i8> @add_constant_not_undef_lane(i8 %x) {
; CHECK-LABEL: @add_constant_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = add <2 x i8> [[INS]], <i8 42, i8 -42>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = add <2 x i8> %ins, <i8 42, i8 -42>
ret <2 x i8> %bo
}
; IR flags are not required, but they should propagate.
define <2 x i8> @sub_constant_op0(i8 %x) {
; CHECK-LABEL: @sub_constant_op0(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = sub nuw nsw <2 x i8> <i8 undef, i8 -42>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = sub nsw nuw <2 x i8> <i8 undef, i8 -42>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @sub_constant_op0_not_undef_lane(i8 %x) {
; CHECK-LABEL: @sub_constant_op0_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = sub nuw <2 x i8> <i8 42, i8 -42>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = sub nuw <2 x i8> <i8 42, i8 -42>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @sub_constant_op1(i8 %x) {
; CHECK-LABEL: @sub_constant_op1(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = add <2 x i8> [[INS]], <i8 -42, i8 undef>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = sub nuw <2 x i8> %ins, <i8 42, i8 undef>
ret <2 x i8> %bo
}
define <2 x i8> @sub_constant_op1_not_undef_lane(i8 %x) {
; CHECK-LABEL: @sub_constant_op1_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = add <2 x i8> [[INS]], <i8 -42, i8 42>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = sub nuw <2 x i8> %ins, <i8 42, i8 -42>
ret <2 x i8> %bo
}
define <3 x i8> @mul_constant(i8 %x) {
; CHECK-LABEL: @mul_constant(
; CHECK-NEXT: [[INS:%.*]] = insertelement <3 x i8> poison, i8 [[X:%.*]], i32 2
; CHECK-NEXT: [[BO:%.*]] = mul <3 x i8> [[INS]], <i8 undef, i8 undef, i8 -42>
; CHECK-NEXT: ret <3 x i8> [[BO]]
;
%ins = insertelement <3 x i8> poison, i8 %x, i32 2
%bo = mul <3 x i8> %ins, <i8 undef, i8 undef, i8 -42>
ret <3 x i8> %bo
}
define <3 x i8> @mul_constant_not_undef_lane(i8 %x) {
; CHECK-LABEL: @mul_constant_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <3 x i8> poison, i8 [[X:%.*]], i32 2
; CHECK-NEXT: [[BO:%.*]] = mul <3 x i8> [[INS]], <i8 42, i8 undef, i8 -42>
; CHECK-NEXT: ret <3 x i8> [[BO]]
;
%ins = insertelement <3 x i8> poison, i8 %x, i32 2
%bo = mul <3 x i8> %ins, <i8 42, i8 undef, i8 -42>
ret <3 x i8> %bo
}
define <2 x i8> @shl_constant_op0(i8 %x) {
; CHECK-LABEL: @shl_constant_op0(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = shl <2 x i8> <i8 undef, i8 2>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = shl <2 x i8> <i8 undef, i8 2>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @shl_constant_op0_not_undef_lane(i8 %x) {
; CHECK-LABEL: @shl_constant_op0_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = shl <2 x i8> <i8 5, i8 2>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = shl <2 x i8> <i8 5, i8 2>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @shl_constant_op1(i8 %x) {
; CHECK-LABEL: @shl_constant_op1(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i8> [[INS]], <i8 5, i8 undef>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = shl nuw <2 x i8> %ins, <i8 5, i8 undef>
ret <2 x i8> %bo
}
define <2 x i8> @shl_constant_op1_not_undef_lane(i8 %x) {
; CHECK-LABEL: @shl_constant_op1_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i8> [[INS]], <i8 5, i8 2>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = shl nuw <2 x i8> %ins, <i8 5, i8 2>
ret <2 x i8> %bo
}
define <2 x i8> @ashr_constant_op0(i8 %x) {
; CHECK-LABEL: @ashr_constant_op0(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = ashr exact <2 x i8> <i8 undef, i8 2>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = ashr exact <2 x i8> <i8 undef, i8 2>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @ashr_constant_op0_not_undef_lane(i8 %x) {
; CHECK-LABEL: @ashr_constant_op0_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = lshr <2 x i8> <i8 5, i8 2>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = ashr exact <2 x i8> <i8 5, i8 2>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @ashr_constant_op1(i8 %x) {
; CHECK-LABEL: @ashr_constant_op1(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = ashr <2 x i8> [[INS]], <i8 5, i8 undef>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = ashr <2 x i8> %ins, <i8 5, i8 undef>
ret <2 x i8> %bo
}
define <2 x i8> @ashr_constant_op1_not_undef_lane(i8 %x) {
; CHECK-LABEL: @ashr_constant_op1_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = ashr <2 x i8> [[INS]], <i8 5, i8 2>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = ashr <2 x i8> %ins, <i8 5, i8 2>
ret <2 x i8> %bo
}
define <2 x i8> @lshr_constant_op0(i8 %x) {
; CHECK-LABEL: @lshr_constant_op0(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = lshr <2 x i8> <i8 5, i8 undef>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = lshr <2 x i8> <i8 5, i8 undef>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @lshr_constant_op0_not_undef_lane(i8 %x) {
; CHECK-LABEL: @lshr_constant_op0_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = lshr <2 x i8> <i8 5, i8 2>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = lshr <2 x i8> <i8 5, i8 2>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @lshr_constant_op1(i8 %x) {
; CHECK-LABEL: @lshr_constant_op1(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = lshr exact <2 x i8> [[INS]], <i8 undef, i8 2>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = lshr exact <2 x i8> %ins, <i8 undef, i8 2>
ret <2 x i8> %bo
}
define <2 x i8> @lshr_constant_op1_not_undef_lane(i8 %x) {
; CHECK-LABEL: @lshr_constant_op1_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = lshr exact <2 x i8> [[INS]], <i8 5, i8 2>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = lshr exact <2 x i8> %ins, <i8 5, i8 2>
ret <2 x i8> %bo
}
define <2 x i8> @urem_constant_op0(i8 %x) {
; CHECK-LABEL: @urem_constant_op0(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = urem <2 x i8> <i8 5, i8 undef>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = urem <2 x i8> <i8 5, i8 undef>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @urem_constant_op0_not_undef_lane(i8 %x) {
; CHECK-LABEL: @urem_constant_op0_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = urem <2 x i8> <i8 5, i8 2>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = urem <2 x i8> <i8 5, i8 2>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @urem_constant_op1(i8 %x) {
; CHECK-LABEL: @urem_constant_op1(
; CHECK-NEXT: ret <2 x i8> undef
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = urem <2 x i8> %ins, <i8 undef, i8 2>
ret <2 x i8> %bo
}
define <2 x i8> @urem_constant_op1_not_undef_lane(i8 %x) {
; CHECK-LABEL: @urem_constant_op1_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = urem <2 x i8> [[INS]], <i8 5, i8 2>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = urem <2 x i8> %ins, <i8 5, i8 2>
ret <2 x i8> %bo
}
define <2 x i8> @srem_constant_op0(i8 %x) {
; CHECK-LABEL: @srem_constant_op0(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = srem <2 x i8> <i8 5, i8 undef>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = srem <2 x i8> <i8 5, i8 undef>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @srem_constant_op0_not_undef_lane(i8 %x) {
; CHECK-LABEL: @srem_constant_op0_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = srem <2 x i8> <i8 5, i8 2>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = srem <2 x i8> <i8 5, i8 2>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @srem_constant_op1(i8 %x) {
; CHECK-LABEL: @srem_constant_op1(
; CHECK-NEXT: ret <2 x i8> undef
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = srem <2 x i8> %ins, <i8 undef, i8 2>
ret <2 x i8> %bo
}
define <2 x i8> @srem_constant_op1_not_undef_lane(i8 %x) {
; CHECK-LABEL: @srem_constant_op1_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = srem <2 x i8> [[INS]], <i8 5, i8 2>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = srem <2 x i8> %ins, <i8 5, i8 2>
ret <2 x i8> %bo
}
define <2 x i8> @udiv_constant_op0(i8 %x) {
; CHECK-LABEL: @udiv_constant_op0(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = udiv exact <2 x i8> <i8 5, i8 undef>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = udiv exact <2 x i8> <i8 5, i8 undef>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @udiv_constant_op0_not_undef_lane(i8 %x) {
; CHECK-LABEL: @udiv_constant_op0_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = udiv exact <2 x i8> <i8 5, i8 2>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = udiv exact <2 x i8> <i8 5, i8 2>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @udiv_constant_op1(i8 %x) {
; CHECK-LABEL: @udiv_constant_op1(
; CHECK-NEXT: ret <2 x i8> undef
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = udiv <2 x i8> %ins, <i8 undef, i8 2>
ret <2 x i8> %bo
}
define <2 x i8> @udiv_constant_op1_not_undef_lane(i8 %x) {
; CHECK-LABEL: @udiv_constant_op1_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = udiv <2 x i8> [[INS]], <i8 5, i8 2>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = udiv <2 x i8> %ins, <i8 5, i8 2>
ret <2 x i8> %bo
}
define <2 x i8> @sdiv_constant_op0(i8 %x) {
; CHECK-LABEL: @sdiv_constant_op0(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = sdiv <2 x i8> <i8 5, i8 undef>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = sdiv <2 x i8> <i8 5, i8 undef>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @sdiv_constant_op0_not_undef_lane(i8 %x) {
; CHECK-LABEL: @sdiv_constant_op0_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = sdiv <2 x i8> <i8 5, i8 2>, [[INS]]
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = sdiv <2 x i8> <i8 5, i8 2>, %ins
ret <2 x i8> %bo
}
define <2 x i8> @sdiv_constant_op1(i8 %x) {
; CHECK-LABEL: @sdiv_constant_op1(
; CHECK-NEXT: ret <2 x i8> undef
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = sdiv exact <2 x i8> %ins, <i8 undef, i8 2>
ret <2 x i8> %bo
}
define <2 x i8> @sdiv_constant_op1_not_undef_lane(i8 %x) {
; CHECK-LABEL: @sdiv_constant_op1_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = sdiv exact <2 x i8> [[INS]], <i8 5, i8 2>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = sdiv exact <2 x i8> %ins, <i8 5, i8 2>
ret <2 x i8> %bo
}
define <2 x i8> @and_constant(i8 %x) {
; CHECK-LABEL: @and_constant(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = and <2 x i8> [[INS]], <i8 42, i8 undef>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = and <2 x i8> %ins, <i8 42, i8 undef>
ret <2 x i8> %bo
}
define <2 x i8> @and_constant_not_undef_lane(i8 %x) {
; CHECK-LABEL: @and_constant_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = and <2 x i8> [[INS]], <i8 42, i8 -42>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = and <2 x i8> %ins, <i8 42, i8 -42>
ret <2 x i8> %bo
}
define <2 x i8> @or_constant(i8 %x) {
; CHECK-LABEL: @or_constant(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = or <2 x i8> [[INS]], <i8 undef, i8 -42>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = or <2 x i8> %ins, <i8 undef, i8 -42>
ret <2 x i8> %bo
}
define <2 x i8> @or_constant_not_undef_lane(i8 %x) {
; CHECK-LABEL: @or_constant_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = or <2 x i8> [[INS]], <i8 42, i8 -42>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 1
%bo = or <2 x i8> %ins, <i8 42, i8 -42>
ret <2 x i8> %bo
}
define <2 x i8> @xor_constant(i8 %x) {
; CHECK-LABEL: @xor_constant(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = xor <2 x i8> [[INS]], <i8 42, i8 undef>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = xor <2 x i8> %ins, <i8 42, i8 undef>
ret <2 x i8> %bo
}
define <2 x i8> @xor_constant_not_undef_lane(i8 %x) {
; CHECK-LABEL: @xor_constant_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = xor <2 x i8> [[INS]], <i8 42, i8 -42>
; CHECK-NEXT: ret <2 x i8> [[BO]]
;
%ins = insertelement <2 x i8> poison, i8 %x, i32 0
%bo = xor <2 x i8> %ins, <i8 42, i8 -42>
ret <2 x i8> %bo
}
define <2 x float> @fadd_constant(float %x) {
; CHECK-LABEL: @fadd_constant(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = fadd <2 x float> [[INS]], <float 4.200000e+01, float undef>
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 0
%bo = fadd <2 x float> %ins, <float 42.0, float undef>
ret <2 x float> %bo
}
define <2 x float> @fadd_constant_not_undef_lane(float %x) {
; CHECK-LABEL: @fadd_constant_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = fadd <2 x float> [[INS]], <float 4.200000e+01, float -4.200000e+01>
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 1
%bo = fadd <2 x float> %ins, <float 42.0, float -42.0>
ret <2 x float> %bo
}
define <2 x float> @fsub_constant_op0(float %x) {
; CHECK-LABEL: @fsub_constant_op0(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = fsub fast <2 x float> <float 4.200000e+01, float undef>, [[INS]]
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 0
%bo = fsub fast <2 x float> <float 42.0, float undef>, %ins
ret <2 x float> %bo
}
define <2 x float> @fsub_constant_op0_not_undef_lane(float %x) {
; CHECK-LABEL: @fsub_constant_op0_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = fsub nsz <2 x float> <float 4.200000e+01, float -4.200000e+01>, [[INS]]
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 1
%bo = fsub nsz <2 x float> <float 42.0, float -42.0>, %ins
ret <2 x float> %bo
}
define <2 x float> @fsub_constant_op1(float %x) {
; CHECK-LABEL: @fsub_constant_op1(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = fadd <2 x float> [[INS]], <float undef, float -4.200000e+01>
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 1
%bo = fsub <2 x float> %ins, <float undef, float 42.0>
ret <2 x float> %bo
}
define <2 x float> @fsub_constant_op1_not_undef_lane(float %x) {
; CHECK-LABEL: @fsub_constant_op1_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = fadd <2 x float> [[INS]], <float -4.200000e+01, float 4.200000e+01>
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 0
%bo = fsub <2 x float> %ins, <float 42.0, float -42.0>
ret <2 x float> %bo
}
define <2 x float> @fmul_constant(float %x) {
; CHECK-LABEL: @fmul_constant(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = fmul reassoc <2 x float> [[INS]], <float 4.200000e+01, float undef>
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 0
%bo = fmul reassoc <2 x float> %ins, <float 42.0, float undef>
ret <2 x float> %bo
}
define <2 x float> @fmul_constant_not_undef_lane(float %x) {
; CHECK-LABEL: @fmul_constant_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = fmul <2 x float> [[INS]], <float 4.200000e+01, float -4.200000e+01>
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 1
%bo = fmul <2 x float> %ins, <float 42.0, float -42.0>
ret <2 x float> %bo
}
define <2 x float> @fdiv_constant_op0(float %x) {
; CHECK-LABEL: @fdiv_constant_op0(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = fdiv nnan <2 x float> <float undef, float 4.200000e+01>, [[INS]]
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 1
%bo = fdiv nnan <2 x float> <float undef, float 42.0>, %ins
ret <2 x float> %bo
}
define <2 x float> @fdiv_constant_op0_not_undef_lane(float %x) {
; CHECK-LABEL: @fdiv_constant_op0_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = fdiv ninf <2 x float> <float 4.200000e+01, float -4.200000e+01>, [[INS]]
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 0
%bo = fdiv ninf <2 x float> <float 42.0, float -42.0>, %ins
ret <2 x float> %bo
}
define <2 x float> @fdiv_constant_op1(float %x) {
; CHECK-LABEL: @fdiv_constant_op1(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = fdiv <2 x float> [[INS]], <float 4.200000e+01, float undef>
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 0
%bo = fdiv <2 x float> %ins, <float 42.0, float undef>
ret <2 x float> %bo
}
define <2 x float> @fdiv_constant_op1_not_undef_lane(float %x) {
; CHECK-LABEL: @fdiv_constant_op1_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = fdiv <2 x float> [[INS]], <float 4.200000e+01, float -4.200000e+01>
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 0
%bo = fdiv <2 x float> %ins, <float 42.0, float -42.0>
ret <2 x float> %bo
}
define <2 x float> @frem_constant_op0(float %x) {
; CHECK-LABEL: @frem_constant_op0(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = frem fast <2 x float> <float 4.200000e+01, float undef>, [[INS]]
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 0
%bo = frem fast <2 x float> <float 42.0, float undef>, %ins
ret <2 x float> %bo
}
define <2 x float> @frem_constant_op0_not_undef_lane(float %x) {
; CHECK-LABEL: @frem_constant_op0_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = frem <2 x float> <float 4.200000e+01, float -4.200000e+01>, [[INS]]
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 1
%bo = frem <2 x float> <float 42.0, float -42.0>, %ins
ret <2 x float> %bo
}
define <2 x float> @frem_constant_op1(float %x) {
; CHECK-LABEL: @frem_constant_op1(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 1
; CHECK-NEXT: [[BO:%.*]] = frem ninf <2 x float> [[INS]], <float undef, float 4.200000e+01>
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 1
%bo = frem ninf <2 x float> %ins, <float undef, float 42.0>
ret <2 x float> %bo
}
define <2 x float> @frem_constant_op1_not_undef_lane(float %x) {
; CHECK-LABEL: @frem_constant_op1_not_undef_lane(
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = frem nnan <2 x float> [[INS]], <float 4.200000e+01, float -4.200000e+01>
; CHECK-NEXT: ret <2 x float> [[BO]]
;
%ins = insertelement <2 x float> poison, float %x, i32 0
%bo = frem nnan <2 x float> %ins, <float 42.0, float -42.0>
ret <2 x float> %bo
}

View File

@ -0,0 +1,735 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -instcombine %s | FileCheck %s
define <1 x i8> @test1(<8 x i8> %in) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <8 x i8> [[IN:%.*]], <8 x i8> undef, <1 x i32> <i32 5>
; CHECK-NEXT: ret <1 x i8> [[VEC]]
;
%val = extractelement <8 x i8> %in, i32 5
%vec = insertelement <1 x i8> poison, i8 %val, i32 0
ret <1 x i8> %vec
}
define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[VEC_3:%.*]] = shufflevector <8 x i16> [[IN2:%.*]], <8 x i16> [[IN:%.*]], <4 x i32> <i32 11, i32 9, i32 0, i32 10>
; CHECK-NEXT: ret <4 x i16> [[VEC_3]]
;
%elt0 = extractelement <8 x i16> %in, i32 3
%elt1 = extractelement <8 x i16> %in, i32 1
%elt2 = extractelement <8 x i16> %in2, i32 0
%elt3 = extractelement <8 x i16> %in, i32 2
%vec.0 = insertelement <4 x i16> poison, i16 %elt0, i32 0
%vec.1 = insertelement <4 x i16> %vec.0, i16 %elt1, i32 1
%vec.2 = insertelement <4 x i16> %vec.1, i16 %elt2, i32 2
%vec.3 = insertelement <4 x i16> %vec.2, i16 %elt3, i32 3
ret <4 x i16> %vec.3
}
define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) {
; CHECK-LABEL: @test_vcopyq_lane_p64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i64> [[B:%.*]], <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[TMP1]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: ret <2 x i64> [[RES]]
;
%elt = extractelement <1 x i64> %b, i32 0
%res = insertelement <2 x i64> %a, i64 %elt, i32 1
ret <2 x i64> %res
}
; PR2109: https://llvm.org/bugs/show_bug.cgi?id=2109
define <4 x float> @widen_extract2(<4 x float> %ins, <2 x float> %ext) {
; CHECK-LABEL: @widen_extract2(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[EXT:%.*]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[I2:%.*]] = shufflevector <4 x float> [[INS:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 4, i32 2, i32 5>
; CHECK-NEXT: ret <4 x float> [[I2]]
;
%e1 = extractelement <2 x float> %ext, i32 0
%e2 = extractelement <2 x float> %ext, i32 1
%i1 = insertelement <4 x float> %ins, float %e1, i32 1
%i2 = insertelement <4 x float> %i1, float %e2, i32 3
ret <4 x float> %i2
}
define <4 x float> @widen_extract3(<4 x float> %ins, <3 x float> %ext) {
; CHECK-LABEL: @widen_extract3(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[EXT:%.*]], <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT: [[I3:%.*]] = shufflevector <4 x float> [[INS:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 6, i32 5, i32 4, i32 3>
; CHECK-NEXT: ret <4 x float> [[I3]]
;
%e1 = extractelement <3 x float> %ext, i32 0
%e2 = extractelement <3 x float> %ext, i32 1
%e3 = extractelement <3 x float> %ext, i32 2
%i1 = insertelement <4 x float> %ins, float %e1, i32 2
%i2 = insertelement <4 x float> %i1, float %e2, i32 1
%i3 = insertelement <4 x float> %i2, float %e3, i32 0
ret <4 x float> %i3
}
define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) {
; CHECK-LABEL: @widen_extract4(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[EXT:%.*]], <2 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[I1:%.*]] = shufflevector <8 x float> [[INS:%.*]], <8 x float> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <8 x float> [[I1]]
;
%e1 = extractelement <2 x float> %ext, i32 0
%i1 = insertelement <8 x float> %ins, float %e1, i32 2
ret <8 x float> %i1
}
; PR26015: https://llvm.org/bugs/show_bug.cgi?id=26015
; The widening shuffle must be inserted before any uses.
define <8 x i16> @pr26015(<4 x i16> %t0) {
; CHECK-LABEL: @pr26015(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 10, i32 4, i32 5, i32 6, i32 11>
; CHECK-NEXT: ret <8 x i16> [[T5]]
;
%t1 = extractelement <4 x i16> %t0, i32 2
%t2 = insertelement <8 x i16> zeroinitializer, i16 %t1, i32 3
%t3 = insertelement <8 x i16> %t2, i16 0, i32 6
%t4 = extractelement <4 x i16> %t0, i32 3
%t5 = insertelement <8 x i16> %t3, i16 %t4, i32 7
ret <8 x i16> %t5
}
; PR25999: https://llvm.org/bugs/show_bug.cgi?id=25999
; TODO: The widening shuffle could be inserted at the start of the function to allow the first extract to use it.
define <8 x i16> @pr25999(<4 x i16> %t0, i1 %b) {
; CHECK-LABEL: @pr25999(
; CHECK-NEXT: [[T1:%.*]] = extractelement <4 x i16> [[T0:%.*]], i32 2
; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[END:%.*]]
; CHECK: if:
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0]], <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, i16 [[T1]], i32 3
; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11>
; CHECK-NEXT: ret <8 x i16> [[T5]]
; CHECK: end:
; CHECK-NEXT: [[A1:%.*]] = add i16 [[T1]], 4
; CHECK-NEXT: [[T6:%.*]] = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 [[A1]], i32 0
; CHECK-NEXT: ret <8 x i16> [[T6]]
;
%t1 = extractelement <4 x i16> %t0, i32 2
br i1 %b, label %if, label %end
if:
%t2 = insertelement <8 x i16> zeroinitializer, i16 %t1, i32 3
%t3 = insertelement <8 x i16> %t2, i16 0, i32 6
%t4 = extractelement <4 x i16> %t0, i32 3
%t5 = insertelement <8 x i16> %t3, i16 %t4, i32 7
ret <8 x i16> %t5
end:
%a1 = add i16 %t1, 4
%t6 = insertelement <8 x i16> zeroinitializer, i16 %a1, i32 0
ret <8 x i16> %t6
}
; The widening shuffle must be inserted at a valid point (after the PHIs).
define <4 x double> @pr25999_phis1(i1 %c, <2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @pr25999_phis1(
; CHECK-NEXT: bb1:
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB2:%.*]], label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[R:%.*]] = call <2 x double> @dummy(<2 x double> [[A:%.*]])
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ [[A]], [[BB1:%.*]] ], [ [[R]], [[BB2]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x double> [ [[B:%.*]], [[BB1]] ], [ zeroinitializer, [[BB2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
; CHECK-NEXT: ret <4 x double> [[TMP4]]
;
bb1:
br i1 %c, label %bb2, label %bb3
bb2:
%r = call <2 x double> @dummy(<2 x double> %a)
br label %bb3
bb3:
%tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
%tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
%tmp3 = extractelement <2 x double> %tmp1, i32 0
%tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2
ret <4 x double> %tmp4
}
declare <2 x double> @dummy(<2 x double>)
define <4 x double> @pr25999_phis2(i1 %c, <2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @pr25999_phis2(
; CHECK-NEXT: bb1:
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB2:%.*]], label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[R:%.*]] = call <2 x double> @dummy(<2 x double> [[A:%.*]])
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ [[A]], [[BB1:%.*]] ], [ [[R]], [[BB2]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x double> [ [[B:%.*]], [[BB1]] ], [ zeroinitializer, [[BB2]] ]
; CHECK-NEXT: [[D:%.*]] = fadd <2 x double> [[TMP1]], [[TMP1]]
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[D]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
; CHECK-NEXT: ret <4 x double> [[TMP4]]
;
bb1:
br i1 %c, label %bb2, label %bb3
bb2:
%r = call <2 x double> @dummy(<2 x double> %a)
br label %bb3
bb3:
%tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
%tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
%d = fadd <2 x double> %tmp1, %tmp1
%tmp3 = extractelement <2 x double> %d, i32 0
%tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2
ret <4 x double> %tmp4
}
; PR26354: https://llvm.org/bugs/show_bug.cgi?id=26354
; Don't create a shufflevector if we know that we're not going to replace the insertelement.
define double @pr26354(<2 x double>* %tmp, i1 %B) {
; CHECK-LABEL: @pr26354(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* [[TMP:%.*]], align 16
; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[END:%.*]]
; CHECK: if:
; CHECK-NEXT: [[E2:%.*]] = extractelement <2 x double> [[LD]], i32 1
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, double [[E2]], i32 3
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[PH:%.*]] = phi <4 x double> [ undef, [[ENTRY:%.*]] ], [ [[I1]], [[IF]] ]
; CHECK-NEXT: [[E1:%.*]] = extractelement <2 x double> [[LD]], i32 0
; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x double> [[PH]], i32 1
; CHECK-NEXT: [[MU:%.*]] = fmul double [[E1]], [[E3]]
; CHECK-NEXT: ret double [[MU]]
;
entry:
%ld = load <2 x double>, <2 x double>* %tmp
%e1 = extractelement <2 x double> %ld, i32 0
%e2 = extractelement <2 x double> %ld, i32 1
br i1 %B, label %if, label %end
if:
%i1 = insertelement <4 x double> zeroinitializer, double %e2, i32 3
br label %end
end:
%ph = phi <4 x double> [ undef, %entry ], [ %i1, %if ]
%e3 = extractelement <4 x double> %ph, i32 1
%mu = fmul double %e1, %e3
ret double %mu
}
; https://llvm.org/bugs/show_bug.cgi?id=30923
; Delete the widening shuffle if we're not going to reduce the extract/insert to a shuffle.
define <4 x float> @PR30923(<2 x float> %x) {
; CHECK-LABEL: @PR30923(
; CHECK-NEXT: bb1:
; CHECK-NEXT: [[EXT1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
; CHECK-NEXT: store float [[EXT1]], float* undef, align 4
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[EXT2:%.*]] = extractelement <2 x float> [[X]], i32 0
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float undef, float undef>, float [[EXT2]], i32 2
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float [[EXT1]], i32 3
; CHECK-NEXT: ret <4 x float> [[INS2]]
;
bb1:
%ext1 = extractelement <2 x float> %x, i32 1
store float %ext1, float* undef, align 4
br label %bb2
bb2:
%widen = shufflevector <2 x float> %x, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%ext2 = extractelement <4 x float> %widen, i32 0
%ins1 = insertelement <4 x float> <float 0.0, float 0.0, float undef, float undef>, float %ext2, i32 2
%ins2 = insertelement <4 x float> %ins1, float %ext1, i32 3
ret <4 x float> %ins2
}
; Don't insert extractelements from the wider vector before the def of the index operand.
define <4 x i32> @extractelt_insertion(<2 x i32> %x, i32 %y) {
; CHECK-LABEL: @extractelt_insertion(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
; CHECK-NEXT: [[C:%.*]] = add i32 [[Y:%.*]], 3
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 [[C]]
; CHECK-NEXT: [[E:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: [[RET:%.*]] = select i1 [[E]], <4 x i32> [[B]], <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> [[RET]]
;
entry:
%a = extractelement <2 x i32> %x, i32 1
%b = insertelement <4 x i32> zeroinitializer, i32 %a, i64 3
%c = add i32 %y, 3
%d = extractelement <2 x i32> %x, i32 %c
%e = icmp eq i32 %d, 0
%ret = select i1 %e, <4 x i32> %b, <4 x i32> zeroinitializer
ret <4 x i32> %ret
}
; PR34724: https://bugs.llvm.org/show_bug.cgi?id=34724
define <4 x float> @collectShuffleElts(<2 x float> %x, float %y) {
; CHECK-LABEL: @collectShuffleElts(
; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 1
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[X1]], i32 2
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[Y:%.*]], i32 3
; CHECK-NEXT: ret <4 x float> [[V3]]
;
%x0 = extractelement <2 x float> %x, i32 0
%x1 = extractelement <2 x float> %x, i32 1
%v1 = insertelement <4 x float> poison, float %x0, i32 1
%v2 = insertelement <4 x float> %v1, float %x1, i32 2
%v3 = insertelement <4 x float> %v2, float %y, i32 3
ret <4 x float> %v3
}
; Simplest case - insert scalar into undef, then shuffle that value in place into another vector.
define <4 x float> @insert_shuffle(float %x, <4 x float> %y) {
; CHECK-LABEL: @insert_shuffle(
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 0
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv = insertelement <4 x float> poison, float %x, i32 0
%r = shufflevector <4 x float> %xv, <4 x float> %y, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x float> %r
}
; Insert scalar into some element of a dummy vector, then move it to a different element in another vector.
define <4 x float> @insert_shuffle_translate(float %x, <4 x float> %y) {
; CHECK-LABEL: @insert_shuffle_translate(
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 1
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv = insertelement <4 x float> poison, float %x, i32 0
%r = shufflevector <4 x float> %xv, <4 x float> %y, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
ret <4 x float> %r
}
; The vector operand of the insert is irrelevant.
define <4 x float> @insert_not_undef_shuffle_translate(float %x, <4 x float> %y, <4 x float> %q) {
; CHECK-LABEL: @insert_not_undef_shuffle_translate(
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 2
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv = insertelement <4 x float> %q, float %x, i32 3
%r = shufflevector <4 x float> %xv, <4 x float> %y, <4 x i32> <i32 4, i32 5, i32 3, i32 7>
ret <4 x float> %r
}
; The insert may be the 2nd operand of the shuffle. The shuffle mask can include undef elements.
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute(
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 1
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> <i32 0, i32 6, i32 2, i32 undef>
ret <4 x float> %r
}
; Both shuffle operands may be inserts - choose the correct side.
define <4 x float> @insert_insert_shuffle_translate(float %x1, float %x2, <4 x float> %q) {
; CHECK-LABEL: @insert_insert_shuffle_translate(
; CHECK-NEXT: [[XV2:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X2:%.*]], i32 2
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[XV2]], float [[X1:%.*]], i32 1
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv1 = insertelement <4 x float> %q, float %x1, i32 0
%xv2 = insertelement <4 x float> %q, float %x2, i32 2
%r = shufflevector <4 x float> %xv1, <4 x float> %xv2, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
ret <4 x float> %r
}
; Both shuffle operands may be inserts - choose the correct side.
define <4 x float> @insert_insert_shuffle_translate_commute(float %x1, float %x2, <4 x float> %q) {
; CHECK-LABEL: @insert_insert_shuffle_translate_commute(
; CHECK-NEXT: [[XV1:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X1:%.*]], i32 0
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[XV1]], float [[X2:%.*]], i32 1
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv1 = insertelement <4 x float> %q, float %x1, i32 0
%xv2 = insertelement <4 x float> %q, float %x2, i32 2
%r = shufflevector <4 x float> %xv1, <4 x float> %xv2, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
ret <4 x float> %r
}
; Negative test - this only works if the shuffle is choosing exactly 1 element from 1 of the inputs.
; TODO: But this could be a special-case because we're inserting into the same base vector.
define <4 x float> @insert_insert_shuffle_translate_wrong_mask(float %x1, float %x2, <4 x float> %q) {
; CHECK-LABEL: @insert_insert_shuffle_translate_wrong_mask(
; CHECK-NEXT: [[XV1:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X1:%.*]], i32 0
; CHECK-NEXT: [[XV2:%.*]] = insertelement <4 x float> [[Q]], float [[X2:%.*]], i32 2
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV1]], <4 x float> [[XV2]], <4 x i32> <i32 0, i32 6, i32 2, i32 7>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv1 = insertelement <4 x float> %q, float %x1, i32 0
%xv2 = insertelement <4 x float> %q, float %x2, i32 2
%r = shufflevector <4 x float> %xv1, <4 x float> %xv2, <4 x i32> <i32 0, i32 6, i32 2, i32 7>
ret <4 x float> %r
}
; The insert may have other uses.
declare void @use(<4 x float>)
define <4 x float> @insert_not_undef_shuffle_translate_commute_uses(float %x, <4 x float> %y, <4 x float> %q) {
; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute_uses(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X:%.*]], i32 2
; CHECK-NEXT: call void @use(<4 x float> [[XV]])
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X]], i32 0
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv = insertelement <4 x float> %q, float %x, i32 2
call void @use(<4 x float> %xv)
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> <i32 6, i32 undef, i32 2, i32 3>
ret <4 x float> %r
}
; Negative test - size-changing shuffle.
define <5 x float> @insert_not_undef_shuffle_translate_commute_lengthen(float %x, <4 x float> %y, <4 x float> %q) {
; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute_lengthen(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 2
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[XV]], <5 x i32> <i32 0, i32 6, i32 2, i32 undef, i32 undef>
; CHECK-NEXT: ret <5 x float> [[R]]
;
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <5 x i32> <i32 0, i32 6, i32 2, i32 undef, i32 undef>
ret <5 x float> %r
}
define <4 x float> @insert_nonzero_index_splat(float %x) {
; CHECK-LABEL: @insert_nonzero_index_splat(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 undef>
; CHECK-NEXT: ret <4 x float> [[SPLAT]]
;
%xv = insertelement <4 x float> poison, float %x, i32 2
%splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 2, i32 undef>
ret <4 x float> %splat
}
define <3 x double> @insert_nonzero_index_splat_narrow(double %x) {
; CHECK-LABEL: @insert_nonzero_index_splat_narrow(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[X:%.*]], i32 0
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <3 x double> [[TMP1]], <3 x double> undef, <3 x i32> <i32 0, i32 undef, i32 0>
; CHECK-NEXT: ret <3 x double> [[SPLAT]]
;
%xv = insertelement <4 x double> poison, double %x, i32 3
%splat = shufflevector <4 x double> %xv, <4 x double> undef, <3 x i32> <i32 3, i32 undef, i32 3>
ret <3 x double> %splat
}
define <5 x i7> @insert_nonzero_index_splat_widen(i7 %x) {
; CHECK-LABEL: @insert_nonzero_index_splat_widen(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x i7> undef, i7 [[X:%.*]], i32 0
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <5 x i7> [[TMP1]], <5 x i7> undef, <5 x i32> <i32 undef, i32 0, i32 0, i32 undef, i32 0>
; CHECK-NEXT: ret <5 x i7> [[SPLAT]]
;
%xv = insertelement <4 x i7> poison, i7 %x, i32 1
%splat = shufflevector <4 x i7> %xv, <4 x i7> undef, <5 x i32> <i32 undef, i32 1, i32 1, i32 undef, i32 1>
ret <5 x i7> %splat
}
; Negative test - don't increase instruction count
define <4 x float> @insert_nonzero_index_splat_extra_use(float %x) {
; CHECK-LABEL: @insert_nonzero_index_splat_extra_use(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 2
; CHECK-NEXT: call void @use(<4 x float> [[XV]])
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 2, i32 undef>
; CHECK-NEXT: ret <4 x float> [[SPLAT]]
;
%xv = insertelement <4 x float> poison, float %x, i32 2
call void @use(<4 x float> %xv)
%splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 2, i32 undef>
ret <4 x float> %splat
}
; Negative test - non-undef base vector
define <4 x float> @insert_nonzero_index_splat_wrong_base(float %x, <4 x float> %y) {
; CHECK-LABEL: @insert_nonzero_index_splat_wrong_base(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 2
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 3, i32 undef>
; CHECK-NEXT: ret <4 x float> [[SPLAT]]
;
%xv = insertelement <4 x float> %y, float %x, i32 2
%splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 3, i32 undef>
ret <4 x float> %splat
}
; Negative test - non-constant insert index
define <4 x float> @insert_nonzero_index_splat_wrong_index(float %x, i32 %index) {
; CHECK-LABEL: @insert_nonzero_index_splat_wrong_index(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 [[INDEX:%.*]]
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
; CHECK-NEXT: ret <4 x float> [[SPLAT]]
;
%xv = insertelement <4 x float> poison, float %x, i32 %index
%splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
ret <4 x float> %splat
}
define <4 x float> @insert_in_splat(float %x) {
; CHECK-LABEL: @insert_in_splat(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv = insertelement <4 x float> poison, float %x, i32 0
%splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 undef>
%r = insertelement <4 x float> %splat, float %x, i32 3
ret <4 x float> %r
}
define <4 x float> @insert_in_splat_extra_uses(float %x) {
; CHECK-LABEL: @insert_in_splat_extra_uses(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: call void @use(<4 x float> [[XV]])
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 undef>
; CHECK-NEXT: call void @use(<4 x float> [[SPLAT]])
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv = insertelement <4 x float> poison, float %x, i32 0
call void @use(<4 x float> %xv)
%splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 undef>
call void @use(<4 x float> %splat)
%r = insertelement <4 x float> %splat, float %x, i32 3
ret <4 x float> %r
}
; Negative test - not a constant index insert
define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) {
; CHECK-LABEL: @insert_in_splat_variable_index(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 undef>
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i32 [[Y:%.*]]
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv = insertelement <4 x float> poison, float %x, i32 0
%splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 undef>
%r = insertelement <4 x float> %splat, float %x, i32 %y
ret <4 x float> %r
}
; Negative test - not a splat shuffle
define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) {
; CHECK-LABEL: @insert_in_nonsplat(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> <i32 undef, i32 0, i32 4, i32 undef>
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i32 3
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv = insertelement <4 x float> poison, float %x, i32 0
%splat = shufflevector <4 x float> %xv, <4 x float> %y, <4 x i32> <i32 undef, i32 0, i32 4, i32 undef>
%r = insertelement <4 x float> %splat, float %x, i32 3
ret <4 x float> %r
}
; Negative test - not a splat shuffle
define <4 x float> @insert_in_nonsplat2(float %x, <4 x float> %y) {
; CHECK-LABEL: @insert_in_nonsplat2(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 0
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i32 3
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv = insertelement <4 x float> %y, float %x, i32 0
%splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
%r = insertelement <4 x float> %splat, float %x, i32 3
ret <4 x float> %r
}
define <4 x i8> @shuf_identity_padding(<2 x i8> %x, i8 %y) {
; CHECK-LABEL: @shuf_identity_padding(
; CHECK-NEXT: [[V1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i32 2
; CHECK-NEXT: ret <4 x i8> [[V2]]
;
%v0 = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%x1 = extractelement <2 x i8> %x, i32 1
%v1 = insertelement <4 x i8> %v0, i8 %x1, i32 1
%v2 = insertelement <4 x i8> %v1, i8 %y, i32 2
ret <4 x i8> %v2
}
define <3 x i8> @shuf_identity_extract(<4 x i8> %x, i8 %y) {
; CHECK-LABEL: @shuf_identity_extract(
; CHECK-NEXT: [[V1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 undef>
; CHECK-NEXT: [[V2:%.*]] = insertelement <3 x i8> [[V1]], i8 [[Y:%.*]], i32 2
; CHECK-NEXT: ret <3 x i8> [[V2]]
;
%v0 = shufflevector <4 x i8> %x, <4 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
%x1 = extractelement <4 x i8> %x, i32 1
%v1 = insertelement <3 x i8> %v0, i8 %x1, i32 1
%v2 = insertelement <3 x i8> %v1, i8 %y, i32 2
ret <3 x i8> %v2
}
define <4 x float> @shuf_identity_extract_extra_use(<6 x float> %x, float %y) {
; CHECK-LABEL: @shuf_identity_extract_extra_use(
; CHECK-NEXT: [[V0:%.*]] = shufflevector <6 x float> [[X:%.*]], <6 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 3>
; CHECK-NEXT: call void @use(<4 x float> [[V0]])
; CHECK-NEXT: [[V1:%.*]] = shufflevector <6 x float> [[X]], <6 x float> undef, <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[Y:%.*]], i32 1
; CHECK-NEXT: ret <4 x float> [[V2]]
;
%v0 = shufflevector <6 x float> %x, <6 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 3>
call void @use(<4 x float> %v0)
%x1 = extractelement <6 x float> %x, i32 2
%v1 = insertelement <4 x float> %v0, float %x1, i32 2
%v2 = insertelement <4 x float> %v1, float %y, i32 1
ret <4 x float> %v2
}
; Negative test - can't map variable index to shuffle mask.
define <4 x i8> @shuf_identity_padding_variable_index(<2 x i8> %x, i8 %y, i32 %index) {
; CHECK-LABEL: @shuf_identity_padding_variable_index(
; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x i8> [[X]], i32 [[INDEX:%.*]]
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i32 [[INDEX]]
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i32 2
; CHECK-NEXT: ret <4 x i8> [[V2]]
;
%v0 = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%x1 = extractelement <2 x i8> %x, i32 %index
%v1 = insertelement <4 x i8> %v0, i8 %x1, i32 %index
%v2 = insertelement <4 x i8> %v1, i8 %y, i32 2
ret <4 x i8> %v2
}
; Negative test - don't create arbitrary shuffle masks.
define <4 x i8> @shuf_identity_padding_wrong_source_vec(<2 x i8> %x, i8 %y, <2 x i8> %other) {
; CHECK-LABEL: @shuf_identity_padding_wrong_source_vec(
; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x i8> [[OTHER:%.*]], i32 1
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i32 1
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i32 2
; CHECK-NEXT: ret <4 x i8> [[V2]]
;
%v0 = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%x1 = extractelement <2 x i8> %other, i32 1
%v1 = insertelement <4 x i8> %v0, i8 %x1, i32 1
%v2 = insertelement <4 x i8> %v1, i8 %y, i32 2
ret <4 x i8> %v2
}
; Negative test - don't create arbitrary shuffle masks.
define <4 x i8> @shuf_identity_padding_wrong_index(<2 x i8> %x, i8 %y) {
; CHECK-LABEL: @shuf_identity_padding_wrong_index(
; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x i8> [[X]], i32 1
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i32 2
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i32 3
; CHECK-NEXT: ret <4 x i8> [[V2]]
;
%v0 = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%x1 = extractelement <2 x i8> %x, i32 1
%v1 = insertelement <4 x i8> %v0, i8 %x1, i32 2
%v2 = insertelement <4 x i8> %v1, i8 %y, i32 3
ret <4 x i8> %v2
}
define <4 x float> @insert_undemanded_element_op0(<4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @insert_undemanded_element_op0(
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.200000e+01, i32 3
; CHECK-NEXT: call void @use(<4 x float> [[INS]])
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y:%.*]], <4 x i32> <i32 0, i32 7, i32 1, i32 4>
; CHECK-NEXT: ret <4 x float> [[S]]
;
%ins = insertelement <4 x float> %x, float 42.0, i32 3
call void @use(<4 x float> %ins)
%s = shufflevector <4 x float> %ins, <4 x float> %y, <4 x i32> <i32 0, i32 7, i32 1, i32 4>
ret <4 x float> %s
}
define <4 x float> @insert_undemanded_element_op1(<4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @insert_undemanded_element_op1(
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.200000e+01, i32 3
; CHECK-NEXT: call void @use(<4 x float> [[INS]])
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[X]], <4 x i32> <i32 3, i32 2, i32 1, i32 4>
; CHECK-NEXT: ret <4 x float> [[S]]
;
%ins = insertelement <4 x float> %x, float 42.0, i32 3
call void @use(<4 x float> %ins)
%s = shufflevector <4 x float> %y, <4 x float> %ins, <4 x i32> <i32 3, i32 2, i32 1, i32 4>
ret <4 x float> %s
}
; Negative test - shuffle chooses the inserted constant.
define <4 x float> @insert_demanded_element_op0(<4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @insert_demanded_element_op0(
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.200000e+01, i32 3
; CHECK-NEXT: call void @use(<4 x float> [[INS]])
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[INS]], <4 x float> [[Y:%.*]], <4 x i32> <i32 3, i32 2, i32 1, i32 4>
; CHECK-NEXT: ret <4 x float> [[S]]
;
%ins = insertelement <4 x float> %x, float 42.0, i32 3
call void @use(<4 x float> %ins)
%s = shufflevector <4 x float> %ins, <4 x float> %y, <4 x i32> <i32 3, i32 2, i32 1, i32 4>
ret <4 x float> %s
}
; Negative test - shuffle chooses the inserted constant.
define <4 x float> @insert_demanded_element_op1(<4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @insert_demanded_element_op1(
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.300000e+01, i32 3
; CHECK-NEXT: call void @use(<4 x float> [[INS]])
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[INS]], <4 x i32> <i32 0, i32 7, i32 1, i32 4>
; CHECK-NEXT: ret <4 x float> [[S]]
;
%ins = insertelement <4 x float> %x, float 43.0, i32 3
call void @use(<4 x float> %ins)
%s = shufflevector <4 x float> %y, <4 x float> %ins, <4 x i32> <i32 0, i32 7, i32 1, i32 4>
ret <4 x float> %s
}
define <4 x float> @splat_constant(<4 x float> %x) {
; CHECK-LABEL: @splat_constant(
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x float> [[X:%.*]], float 3.000000e+00, i32 3
; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[INS3]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%ins3 = insertelement <4 x float> %x, float 3.0, i32 3
%splat3 = shufflevector <4 x float> %ins3, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%r = fadd <4 x float> %ins3, %splat3
ret <4 x float> %r
}

View File

@ -0,0 +1,271 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instcombine -S < %s | FileCheck %s
declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask)
declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru)
declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32, <4 x i1> %mask, <4 x double> %passthru)
declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru) {
; CHECK-LABEL: @load_zeromask(
; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]]
;
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
ret <2 x double> %res
}
define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) {
; CHECK-LABEL: @load_onemask(
; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2
; CHECK-NEXT: ret <2 x double> [[UNMASKEDLOAD]]
;
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 1>, <2 x double> %passthru)
ret <2 x double> %res
}
define <2 x double> @load_undefmask(<2 x double>* %ptr, <2 x double> %passthru) {
; CHECK-LABEL: @load_undefmask(
; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2
; CHECK-NEXT: ret <2 x double> [[UNMASKEDLOAD]]
;
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 undef>, <2 x double> %passthru)
ret <2 x double> %res
}
@G = external global i8
define <2 x double> @load_cemask(<2 x double>* %ptr, <2 x double> %passthru) {
; CHECK-LABEL: @load_cemask(
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 ptrtoint (i8* @G to i1)>, <2 x double> [[PASSTHRU:%.*]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 ptrtoint (i8* @G to i1)>, <2 x double> %passthru)
ret <2 x double> %res
}
define <2 x double> @load_lane0(<2 x double>* %ptr, double %pt) {
; CHECK-LABEL: @load_lane0(
; CHECK-NEXT: [[PTV2:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 1
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> [[PTV2]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%ptv1 = insertelement <2 x double> poison, double %pt, i64 0
%ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> %ptv2)
ret <2 x double> %res
}
define double @load_all(double* %base, double %pt) {
; CHECK-LABEL: @load_all(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 0, i64 undef, i64 2, i64 3>
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef)
; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x double> [[RES]], i64 2
; CHECK-NEXT: ret double [[ELT]]
;
%ptrs = getelementptr double, double* %base, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
%res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef)
%elt = extractelement <4 x double> %res, i64 2
ret double %elt
}
define <2 x double> @load_generic(<2 x double>* %ptr, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @load_generic(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%ptv1 = insertelement <2 x double> poison, double %pt, i64 0
%ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
ret <2 x double> %res
}
define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @load_speculative(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]]
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%ptv1 = insertelement <2 x double> poison, double %pt, i64 0
%ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
ret <2 x double> %res
}
define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @load_speculative_less_aligned(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]]
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%ptv1 = insertelement <2 x double> poison, double %pt, i64 0
%ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
ret <2 x double> %res
}
; Can't speculate since only half of required size is known deref
define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @load_spec_neg_size(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%ptv1 = insertelement <2 x double> poison, double %pt, i64 0
%ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
ret <2 x double> %res
}
; Can only speculate one lane (but it's the only one active)
define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @load_spec_lan0(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PTV2]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%ptv1 = insertelement <2 x double> poison, double %pt, i64 0
%ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
%mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask2, <2 x double> %ptv2)
ret <2 x double> %res
}
define void @store_zeromask(<2 x double>* %ptr, <2 x double> %val) {
; CHECK-LABEL: @store_zeromask(
; CHECK-NEXT: ret void
;
call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> zeroinitializer)
ret void
}
define void @store_onemask(<2 x double>* %ptr, <2 x double> %val) {
; CHECK-LABEL: @store_onemask(
; CHECK-NEXT: store <2 x double> [[VAL:%.*]], <2 x double>* [[PTR:%.*]], align 4
; CHECK-NEXT: ret void
;
call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 1>)
ret void
}
define void @store_demandedelts(<2 x double>* %ptr, double %val) {
; CHECK-LABEL: @store_demandedelts(
; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> poison, double [[VAL:%.*]], i32 0
; CHECK-NEXT: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> [[VALVEC1]], <2 x double>* [[PTR:%.*]], i32 4, <2 x i1> <i1 true, i1 false>)
; CHECK-NEXT: ret void
;
%valvec1 = insertelement <2 x double> poison, double %val, i32 0
%valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1
call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %valvec2, <2 x double>* %ptr, i32 4, <2 x i1> <i1 true, i1 false>)
ret void
}
define <2 x double> @gather_generic(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %passthru) {
; CHECK-LABEL: @gather_generic(
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PASSTHRU:%.*]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %passthru)
ret <2 x double> %res
}
define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru) {
; CHECK-LABEL: @gather_zeromask(
; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]]
;
%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> zeroinitializer, <2 x double> %passthru)
ret <2 x double> %res
}
define <2 x double> @gather_onemask(<2 x double*> %ptrs, <2 x double> %passthru) {
; CHECK-LABEL: @gather_onemask(
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> undef)
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> %passthru)
ret <2 x double> %res
}
define <4 x double> @gather_lane2(double* %base, double %pt) {
; CHECK-LABEL: @gather_lane2(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 undef, i64 undef, i64 2, i64 undef>
; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <4 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <4 x double> [[PT_V1]], <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 0>
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> [[PT_V2]])
; CHECK-NEXT: ret <4 x double> [[RES]]
;
%ptrs = getelementptr double, double *%base, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
%pt_v1 = insertelement <4 x double> poison, double %pt, i64 0
%pt_v2 = shufflevector <4 x double> %pt_v1, <4 x double> undef, <4 x i32> zeroinitializer
%res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %pt_v2)
ret <4 x double> %res
}
define <2 x double> @gather_lane0_maybe(double* %base, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @gather_lane0_maybe(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1>
; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1>
%pt_v1 = insertelement <2 x double> poison, double %pt, i64 0
%pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1
%mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2)
ret <2 x double> %res
}
define <2 x double> @gather_lane0_maybe_spec(double* %base, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @gather_lane0_maybe_spec(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1>
; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1>
%pt_v1 = insertelement <2 x double> poison, double %pt, i64 0
%pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1
%mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2)
ret <2 x double> %res
}
define void @scatter_zeromask(<2 x double*> %ptrs, <2 x double> %val) {
; CHECK-LABEL: @scatter_zeromask(
; CHECK-NEXT: ret void
;
call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 8, <2 x i1> zeroinitializer)
ret void
}
define void @scatter_demandedelts(double* %ptr, double %val) {
; CHECK-LABEL: @scatter_demandedelts(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[PTR:%.*]], <2 x i64> <i64 0, i64 undef>
; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> poison, double [[VAL:%.*]], i32 0
; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> [[VALVEC1]], <2 x double*> [[PTRS]], i32 8, <2 x i1> <i1 true, i1 false>)
; CHECK-NEXT: ret void
;
%ptrs = getelementptr double, double* %ptr, <2 x i64> <i64 0, i64 1>
%valvec1 = insertelement <2 x double> poison, double %val, i32 0
%valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1
call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %valvec2, <2 x double*> %ptrs, i32 8, <2 x i1> <i1 true, i1 false>)
ret void
}

View File

@ -0,0 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "p:16:16"
@a = external global [21 x i16], align 1
@offsets = external global [4 x i16], align 1
; The "same gep" optimization should work with vector icmp.
define <4 x i1> @PR38984_1() {
; CHECK-LABEL: @PR38984_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
;
entry:
%0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 1
%1 = insertelement <4 x i16> poison, i16 %0, i32 3
%2 = getelementptr i32, i32* null, <4 x i16> %1
%3 = getelementptr i32, i32* null, <4 x i16> %1
%4 = icmp eq <4 x i32*> %2, %3
ret <4 x i1> %4
}
; The "compare base pointers" optimization should not kick in for vector icmp.
define <4 x i1> @PR38984_2() {
; CHECK-LABEL: @PR38984_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 2
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, i16* getelementptr inbounds ([21 x i16], [21 x i16]* @a, i16 1, i16 0), <4 x i16> [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, i16* null, <4 x i16> [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i16*> [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret <4 x i1> [[TMP4]]
;
entry:
%0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef)
%1 = insertelement <4 x i16> poison, i16 %0, i32 3
%2 = getelementptr i16, i16* getelementptr ([21 x i16], [21 x i16]* @a, i64 1, i32 0), <4 x i16> %1
%3 = getelementptr i16, i16* null, <4 x i16> %1
%4 = icmp eq <4 x i16*> %2, %3
ret <4 x i1> %4
}

View File

@ -0,0 +1,335 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instcombine -S < %s | FileCheck %s
define i32 @extract_load(<4 x i32>* %p) {
; CHECK-LABEL: @extract_load(
; CHECK-NEXT: [[X:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[X]], i32 1
; CHECK-NEXT: ret i32 [[EXT]]
;
%x = load <4 x i32>, <4 x i32>* %p, align 4
%ext = extractelement <4 x i32> %x, i32 1
ret i32 %ext
}
define double @extract_load_fp(<4 x double>* %p) {
; CHECK-LABEL: @extract_load_fp(
; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 3
; CHECK-NEXT: ret double [[EXT]]
;
%x = load <4 x double>, <4 x double>* %p, align 32
%ext = extractelement <4 x double> %x, i32 3
ret double %ext
}
define double @extract_load_volatile(<4 x double>* %p) {
; CHECK-LABEL: @extract_load_volatile(
; CHECK-NEXT: [[X:%.*]] = load volatile <4 x double>, <4 x double>* [[P:%.*]], align 32
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 2
; CHECK-NEXT: ret double [[EXT]]
;
%x = load volatile <4 x double>, <4 x double>* %p
%ext = extractelement <4 x double> %x, i32 2
ret double %ext
}
define double @extract_load_extra_use(<4 x double>* %p, <4 x double>* %p2) {
; CHECK-LABEL: @extract_load_extra_use(
; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 8
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 0
; CHECK-NEXT: store <4 x double> [[X]], <4 x double>* [[P2:%.*]], align 32
; CHECK-NEXT: ret double [[EXT]]
;
%x = load <4 x double>, <4 x double>* %p, align 8
%ext = extractelement <4 x double> %x, i32 0
store <4 x double> %x, <4 x double>* %p2
ret double %ext
}
define double @extract_load_variable_index(<4 x double>* %p, i32 %y) {
; CHECK-LABEL: @extract_load_variable_index(
; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 [[Y:%.*]]
; CHECK-NEXT: ret double [[EXT]]
;
%x = load <4 x double>, <4 x double>* %p
%ext = extractelement <4 x double> %x, i32 %y
ret double %ext
}
define void @scalarize_phi(i32 * %n, float * %inout) {
; CHECK-LABEL: @scalarize_phi(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[T0:%.*]] = load volatile float, float* [[INOUT:%.*]], align 4
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ]
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[N:%.*]], align 4
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[T1]]
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.body:
; CHECK-NEXT: store volatile float [[TMP0]], float* [[INOUT]], align 4
; CHECK-NEXT: [[TMP1]] = fmul float [[TMP0]], 0x4002A3D700000000
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1
; CHECK-NEXT: br label [[FOR_COND]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
%t0 = load volatile float, float * %inout, align 4
%insert = insertelement <4 x float> poison, float %t0, i32 0
%splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
%insert1 = insertelement <4 x float> poison, float 3.0, i32 0
br label %for.cond
for.cond:
%x.0 = phi <4 x float> [ %splat, %entry ], [ %mul, %for.body ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%t1 = load i32, i32 * %n, align 4
%cmp = icmp ne i32 %i.0, %t1
br i1 %cmp, label %for.body, label %for.end
for.body:
%t2 = extractelement <4 x float> %x.0, i32 1
store volatile float %t2, float * %inout, align 4
%mul = fmul <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end:
ret void
}
define float @extract_element_binop_splat_constant_index(<4 x float> %x) {
; CHECK-LABEL: @extract_element_binop_splat_constant_index(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 0x4002A3D700000000
; CHECK-NEXT: ret float [[R]]
;
%b = fadd <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
%r = extractelement <4 x float> %b, i32 2
ret float %r
}
define double @extract_element_binop_splat_with_undef_constant_index(<2 x double> %x) {
; CHECK-LABEL: @extract_element_binop_splat_with_undef_constant_index(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 0
; CHECK-NEXT: [[R:%.*]] = fdiv double 4.200000e+01, [[TMP1]]
; CHECK-NEXT: ret double [[R]]
;
%b = fdiv <2 x double> <double 42.0, double undef>, %x
%r = extractelement <2 x double> %b, i32 0
ret double %r
}
define float @extract_element_binop_nonsplat_constant_index(<2 x float> %x) {
; CHECK-LABEL: @extract_element_binop_nonsplat_constant_index(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul float [[TMP1]], 4.300000e+01
; CHECK-NEXT: ret float [[R]]
;
%b = fmul <2 x float> %x, <float 42.0, float 43.0>
%r = extractelement <2 x float> %b, i32 1
ret float %r
}
define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) {
; CHECK-LABEL: @extract_element_binop_splat_variable_index(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = sdiv i8 [[TMP1]], 42
; CHECK-NEXT: ret i8 [[R]]
;
%b = sdiv <4 x i8> %x, <i8 42, i8 42, i8 42, i8 42>
%r = extractelement <4 x i8> %b, i32 %y
ret i8 %r
}
define i8 @extract_element_binop_splat_with_undef_variable_index(<4 x i8> %x, i32 %y) {
; CHECK-LABEL: @extract_element_binop_splat_with_undef_variable_index(
; CHECK-NEXT: [[B:%.*]] = mul <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 undef, i8 42>
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
; CHECK-NEXT: ret i8 [[R]]
;
%b = mul <4 x i8> %x, <i8 42, i8 42, i8 undef, i8 42>
%r = extractelement <4 x i8> %b, i32 %y
ret i8 %r
}
define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) {
; CHECK-LABEL: @extract_element_binop_nonsplat_variable_index(
; CHECK-NEXT: [[B:%.*]] = lshr <4 x i8> [[X:%.*]], <i8 4, i8 3, i8 undef, i8 2>
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
; CHECK-NEXT: ret i8 [[R]]
;
%b = lshr <4 x i8> %x, <i8 4, i8 3, i8 undef, i8 2>
%r = extractelement <4 x i8> %b, i32 %y
ret i8 %r
}
define float @extract_element_load(<4 x float> %x, <4 x float>* %ptr) {
; CHECK-LABEL: @extract_element_load(
; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR:%.*]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[LOAD]], i32 2
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret float [[R]]
;
%load = load <4 x float>, <4 x float>* %ptr
%add = fadd <4 x float> %x, %load
%r = extractelement <4 x float> %add, i32 2
ret float %r
}
define float @extract_element_multi_Use_load(<4 x float> %x, <4 x float>* %ptr0, <4 x float>* %ptr1) {
; CHECK-LABEL: @extract_element_multi_Use_load(
; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR0:%.*]], align 16
; CHECK-NEXT: store <4 x float> [[LOAD]], <4 x float>* [[PTR1:%.*]], align 16
; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[LOAD]], [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[ADD]], i32 2
; CHECK-NEXT: ret float [[R]]
;
%load = load <4 x float>, <4 x float>* %ptr0
store <4 x float> %load, <4 x float>* %ptr1
%add = fadd <4 x float> %x, %load
%r = extractelement <4 x float> %add, i32 2
ret float %r
}
define float @extract_element_variable_index(<4 x float> %x, i32 %y) {
; CHECK-LABEL: @extract_element_variable_index(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 1.000000e+00
; CHECK-NEXT: ret float [[R]]
;
%add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
%r = extractelement <4 x float> %add, i32 %y
ret float %r
}
define float @extelt_binop_insertelt(<4 x float> %A, <4 x float> %B, float %f) {
; CHECK-LABEL: @extelt_binop_insertelt(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[E:%.*]] = fmul nnan float [[TMP1]], [[F:%.*]]
; CHECK-NEXT: ret float [[E]]
;
%C = insertelement <4 x float> %A, float %f, i32 0
%D = fmul nnan <4 x float> %C, %B
%E = extractelement <4 x float> %D, i32 0
ret float %E
}
; We recurse to find a scalarizable operand.
; FIXME: We should propagate the IR flags including wrapping flags.
define i32 @extelt_binop_binop_insertelt(<4 x i32> %A, <4 x i32> %B, i32 %f) {
; CHECK-LABEL: @extelt_binop_binop_insertelt(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], [[F:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[B]], i32 0
; CHECK-NEXT: [[E:%.*]] = mul i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i32 [[E]]
;
%v = insertelement <4 x i32> %A, i32 %f, i32 0
%C = add <4 x i32> %v, %B
%D = mul nsw <4 x i32> %C, %B
%E = extractelement <4 x i32> %D, i32 0
ret i32 %E
}
define float @extract_element_constant_vector_variable_index(i32 %y) {
; CHECK-LABEL: @extract_element_constant_vector_variable_index(
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i32 [[Y:%.*]]
; CHECK-NEXT: ret float [[R]]
;
%r = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %y
ret float %r
}
define i1 @cheap_to_extract_icmp(<4 x i32> %x, <4 x i1> %y) {
; CHECK-LABEL: @cheap_to_extract_icmp(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2
; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i1 [[R]]
;
%cmp = icmp eq <4 x i32> %x, zeroinitializer
%and = and <4 x i1> %cmp, %y
%r = extractelement <4 x i1> %and, i32 2
ret i1 %r
}
define i1 @cheap_to_extract_fcmp(<4 x float> %x, <4 x i1> %y) {
; CHECK-LABEL: @cheap_to_extract_fcmp(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2
; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i1 [[R]]
;
%cmp = fcmp oeq <4 x float> %x, zeroinitializer
%and = and <4 x i1> %cmp, %y
%r = extractelement <4 x i1> %and, i32 2
ret i1 %r
}
define i1 @extractelt_vector_icmp_constrhs(<2 x i32> %arg) {
; CHECK-LABEL: @extractelt_vector_icmp_constrhs(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 0
; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[EXT]]
;
%cmp = icmp eq <2 x i32> %arg, zeroinitializer
%ext = extractelement <2 x i1> %cmp, i32 0
ret i1 %ext
}
define i1 @extractelt_vector_fcmp_constrhs(<2 x float> %arg) {
; CHECK-LABEL: @extractelt_vector_fcmp_constrhs(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 0
; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
; CHECK-NEXT: ret i1 [[EXT]]
;
%cmp = fcmp oeq <2 x float> %arg, zeroinitializer
%ext = extractelement <2 x i1> %cmp, i32 0
ret i1 %ext
}
define i1 @extractelt_vector_icmp_constrhs_dynidx(<2 x i32> %arg, i32 %idx) {
; CHECK-LABEL: @extractelt_vector_icmp_constrhs_dynidx(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 [[IDX:%.*]]
; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[EXT]]
;
%cmp = icmp eq <2 x i32> %arg, zeroinitializer
%ext = extractelement <2 x i1> %cmp, i32 %idx
ret i1 %ext
}
define i1 @extractelt_vector_fcmp_constrhs_dynidx(<2 x float> %arg, i32 %idx) {
; CHECK-LABEL: @extractelt_vector_fcmp_constrhs_dynidx(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 [[IDX:%.*]]
; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
; CHECK-NEXT: ret i1 [[EXT]]
;
%cmp = fcmp oeq <2 x float> %arg, zeroinitializer
%ext = extractelement <2 x i1> %cmp, i32 %idx
ret i1 %ext
}
define i1 @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(<2 x float> %arg0, <2 x float> %arg1, <2 x float> %arg2, i32 %idx) {
; CHECK-LABEL: @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(
; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[ARG1:%.*]], [[ARG2:%.*]]
; CHECK-NEXT: store volatile <2 x float> [[ADD]], <2 x float>* undef, align 8
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[ADD]], [[ARG0:%.*]]
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x i1> [[CMP]], i32 0
; CHECK-NEXT: ret i1 [[EXT]]
;
%add = fadd <2 x float> %arg1, %arg2
store volatile <2 x float> %add, <2 x float>* undef
%cmp = fcmp oeq <2 x float> %arg0, %add
%ext = extractelement <2 x i1> %cmp, i32 0
ret i1 %ext
}

View File

@ -0,0 +1,213 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -instcombine < %s | FileCheck %s
declare void @v4float_user(<4 x float>) #0
define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
; CHECK-LABEL: @extract_one_select(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2
; CHECK-NEXT: ret float [[EXTRACT]]
;
%cmp = icmp ne i32 %c, 0
%sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
%extract = extractelement <4 x float> %sel, i32 2
ret float %extract
}
; Multiple extractelements
define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
; CHECK-LABEL: @extract_two_select(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: ret <2 x float> [[BUILD2]]
;
%cmp = icmp ne i32 %c, 0
%sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
%extract1 = extractelement <4 x float> %sel, i32 1
%extract2 = extractelement <4 x float> %sel, i32 2
%build1 = insertelement <2 x float> poison, float %extract1, i32 0
%build2 = insertelement <2 x float> %build1, float %extract2, i32 1
ret <2 x float> %build2
}
; Select has an extra non-extractelement user, don't change it
define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
; CHECK-LABEL: @extract_one_select_user(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2
; CHECK-NEXT: call void @v4float_user(<4 x float> [[SEL]])
; CHECK-NEXT: ret float [[EXTRACT]]
;
%cmp = icmp ne i32 %c, 0
%sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
%extract = extractelement <4 x float> %sel, i32 2
call void @v4float_user(<4 x float> %sel)
ret float %extract
}
define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @extract_one_vselect_user(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2
; CHECK-NEXT: call void @v4float_user(<4 x float> [[SEL]])
; CHECK-NEXT: ret float [[EXTRACT]]
;
%cmp = icmp ne <4 x i32> %c, zeroinitializer
%sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
%extract = extractelement <4 x float> %sel, i32 2
call void @v4float_user(<4 x float> %sel)
ret float %extract
}
; Do not convert the vector select into a scalar select. That would increase
; the instruction count and potentially obfuscate a vector min/max idiom.
define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @extract_one_vselect(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SELECT]], i32 0
; CHECK-NEXT: ret float [[EXTRACT]]
;
%cmp = icmp ne <4 x i32> %c, zeroinitializer
%select = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
%extract = extractelement <4 x float> %select, i32 0
ret float %extract
}
; Multiple extractelements from a vector select
define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @extract_two_vselect(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: ret <2 x float> [[BUILD2]]
;
%cmp = icmp ne <4 x i32> %c, zeroinitializer
%sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
%extract1 = extractelement <4 x float> %sel, i32 1
%extract2 = extractelement <4 x float> %sel, i32 2
%build1 = insertelement <2 x float> poison, float %extract1, i32 0
%build2 = insertelement <2 x float> %build1, float %extract2, i32 1
ret <2 x float> %build2
}
; The vector selects are not decomposed into scalar selects because that would increase
; the instruction count. Extract+insert is converted to non-lane-crossing shuffles.
; Test multiple extractelements
define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_vector_select(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: [[A_SINK:%.*]] = select i1 [[TOBOOL_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; CHECK-NEXT: [[TOBOOL1_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: [[A_SINK1:%.*]] = select i1 [[TOBOOL1_NOT]], <4 x float> [[B]], <4 x float> [[A]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A_SINK]], <4 x float> [[A_SINK1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 2
; CHECK-NEXT: [[TOBOOL6_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
; CHECK-NEXT: [[A_SINK2:%.*]] = select i1 [[TOBOOL6_NOT]], <4 x float> [[B]], <4 x float> [[A]]
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[A_SINK2]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[C]], i32 3
; CHECK-NEXT: [[TOBOOL11_NOT:%.*]] = icmp eq i32 [[TMP5]], 0
; CHECK-NEXT: [[A_SINK3:%.*]] = select i1 [[TOBOOL11_NOT]], <4 x float> [[B]], <4 x float> [[A]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[A_SINK3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT: ret <4 x float> [[TMP6]]
;
entry:
%0 = extractelement <4 x i32> %c, i32 0
%tobool = icmp ne i32 %0, 0
%a.sink = select i1 %tobool, <4 x float> %a, <4 x float> %b
%1 = extractelement <4 x float> %a.sink, i32 0
%2 = insertelement <4 x float> poison, float %1, i32 0
%3 = extractelement <4 x i32> %c, i32 1
%tobool1 = icmp ne i32 %3, 0
%a.sink1 = select i1 %tobool1, <4 x float> %a, <4 x float> %b
%4 = extractelement <4 x float> %a.sink1, i32 1
%5 = insertelement <4 x float> %2, float %4, i32 1
%6 = extractelement <4 x i32> %c, i32 2
%tobool6 = icmp ne i32 %6, 0
%a.sink2 = select i1 %tobool6, <4 x float> %a, <4 x float> %b
%7 = extractelement <4 x float> %a.sink2, i32 2
%8 = insertelement <4 x float> %5, float %7, i32 2
%9 = extractelement <4 x i32> %c, i32 3
%tobool11 = icmp ne i32 %9, 0
%a.sink3 = select i1 %tobool11, <4 x float> %a, <4 x float> %b
%10 = extractelement <4 x float> %a.sink3, i32 3
%11 = insertelement <4 x float> %8, float %10, i32 3
ret <4 x float> %11
}
define <4 x i32> @extract_cond(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) {
; CHECK-LABEL: @extract_cond(
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i1> [[CONDV:%.*]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[DOTSPLAT]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%cond = extractelement <4 x i1> %condv, i32 3
%r = select i1 %cond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}
define <4 x i32> @splat_cond(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) {
; CHECK-LABEL: @splat_cond(
; CHECK-NEXT: [[SPLATCOND:%.*]] = shufflevector <4 x i1> [[CONDV:%.*]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[SPLATCOND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%splatcond = shufflevector <4 x i1> %condv, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%r = select <4 x i1> %splatcond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}
declare void @extra_use(i1)
; Negative test
define <4 x i32> @extract_cond_extra_use(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) {
; CHECK-LABEL: @extract_cond_extra_use(
; CHECK-NEXT: [[COND:%.*]] = extractelement <4 x i1> [[CONDV:%.*]], i32 3
; CHECK-NEXT: call void @extra_use(i1 [[COND]])
; CHECK-NEXT: [[R:%.*]] = select i1 [[COND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%cond = extractelement <4 x i1> %condv, i32 3
call void @extra_use(i1 %cond)
%r = select i1 %cond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}
; Negative test
define <4 x i32> @extract_cond_variable_index(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv, i32 %index) {
; CHECK-LABEL: @extract_cond_variable_index(
; CHECK-NEXT: [[COND:%.*]] = extractelement <4 x i1> [[CONDV:%.*]], i32 [[INDEX:%.*]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[COND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%cond = extractelement <4 x i1> %condv, i32 %index
%r = select i1 %cond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}
; IR shuffle can alter the number of elements in the vector, so this is ok.
define <4 x i32> @extract_cond_type_mismatch(<4 x i32> %x, <4 x i32> %y, <5 x i1> %condv) {
; CHECK-LABEL: @extract_cond_type_mismatch(
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <5 x i1> [[CONDV:%.*]], <5 x i1> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[DOTSPLAT]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%cond = extractelement <5 x i1> %condv, i32 1
%r = select i1 %cond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}
attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }

View File

@ -0,0 +1,122 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; This test makes sure that these instructions are properly eliminated.
;
; RUN: opt < %s -instcombine -S | FileCheck %s
define i32 @shl_C1_add_A_C2_i32(i16 %A) {
; CHECK-LABEL: @shl_C1_add_A_C2_i32(
; CHECK-NEXT: [[B:%.*]] = zext i16 [[A:%.*]] to i32
; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]]
; CHECK-NEXT: ret i32 [[D]]
;
%B = zext i16 %A to i32
%C = add i32 %B, 5
%D = shl i32 6, %C
ret i32 %D
}
define i32 @ashr_C1_add_A_C2_i32(i32 %A) {
; CHECK-LABEL: @ashr_C1_add_A_C2_i32(
; CHECK-NEXT: ret i32 0
;
%B = and i32 %A, 65535
%C = add i32 %B, 5
%D = ashr i32 6, %C
ret i32 %D
}
define i32 @lshr_C1_add_A_C2_i32(i32 %A) {
; CHECK-LABEL: @lshr_C1_add_A_C2_i32(
; CHECK-NEXT: [[B:%.*]] = and i32 [[A:%.*]], 65535
; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]]
; CHECK-NEXT: ret i32 [[D]]
;
%B = and i32 %A, 65535
%C = add i32 %B, 5
%D = shl i32 6, %C
ret i32 %D
}
define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) {
; CHECK-LABEL: @shl_C1_add_A_C2_v4i32(
; CHECK-NEXT: [[B:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 poison, i32 -458752>, [[B]]
; CHECK-NEXT: ret <4 x i32> [[D]]
;
%B = zext <4 x i16> %A to <4 x i32>
%C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
%D = shl <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
ret <4 x i32> %D
}
define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) {
; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32(
; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 255, i32 65535>
; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 poison, i32 -1>, [[B]]
; CHECK-NEXT: ret <4 x i32> [[D]]
;
%B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
%C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
%D = ashr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
ret <4 x i32> %D
}
define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) {
; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32(
; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 255, i32 65535>
; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 poison, i32 65535>, [[B]]
; CHECK-NEXT: ret <4 x i32> [[D]]
;
%B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
%C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
%D = lshr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
ret <4 x i32> %D
}
define <4 x i32> @shl_C1_add_A_C2_v4i32_splat(i16 %I) {
; CHECK-LABEL: @shl_C1_add_A_C2_v4i32_splat(
; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32
; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[E:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 poison, i32 -458752>, [[C]]
; CHECK-NEXT: ret <4 x i32> [[E]]
;
%A = zext i16 %I to i32
%B = insertelement <4 x i32> poison, i32 %A, i32 0
%C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
%D = add <4 x i32> %C, <i32 0, i32 1, i32 50, i32 16>
%E = shl <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %D
ret <4 x i32> %E
}
define <4 x i32> @ashr_C1_add_A_C2_v4i32_splat(i16 %I) {
; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32_splat(
; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32
; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[E:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 poison, i32 -1>, [[C]]
; CHECK-NEXT: ret <4 x i32> [[E]]
;
%A = zext i16 %I to i32
%B = insertelement <4 x i32> poison, i32 %A, i32 0
%C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
%D = add <4 x i32> %C, <i32 0, i32 1, i32 50, i32 16>
%E = ashr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %D
ret <4 x i32> %E
}
define <4 x i32> @lshr_C1_add_A_C2_v4i32_splat(i16 %I) {
; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32_splat(
; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32
; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[E:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 poison, i32 65535>, [[C]]
; CHECK-NEXT: ret <4 x i32> [[E]]
;
%A = zext i16 %I to i32
%B = insertelement <4 x i32> poison, i32 %A, i32 0
%C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
%D = add <4 x i32> %C, <i32 0, i32 1, i32 50, i32 16>
%E = lshr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %D
ret <4 x i32> %E
}

View File

@ -0,0 +1,122 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S -o - | FileCheck %s
; This test case was added as a reproducer for a miscompile, where instcombine
; introduced an
; srem <2 x i16> %1, <i16 undef, i16 2>
; instruction, which makes the whole srem undefined (even if we only end up
; extracting the second element in the vector).
define i16 @test_srem_orig(i16 %a, i1 %cmp) {
; CHECK-LABEL: @test_srem_orig(
; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = srem <2 x i16> [[SPLATINSERT]], <i16 2, i16 1>
; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 undef, i16 1>, <2 x i16> [[SPLAT_OP]]
; CHECK-NEXT: [[T3:%.*]] = extractelement <2 x i16> [[T2]], i32 1
; CHECK-NEXT: ret i16 [[T3]]
;
%splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0
%splat = shufflevector <2 x i16> %splatinsert, <2 x i16> undef, <2 x i32> zeroinitializer
%t1 = select i1 %cmp, <2 x i16> <i16 1, i16 1>, <2 x i16> %splat
%t2 = srem <2 x i16> %t1, <i16 2, i16 2>
%t3 = extractelement <2 x i16> %t2, i32 1
ret i16 %t3
}
; This is basically a reduced version of test_srem_orig (based on what the
; code would look like after a few iterations of instcombine, just before we
; try to transform the shufflevector by doing
; "evaluateInDifferentElementOrder".
define <2 x i16> @test_srem(i16 %a, i1 %cmp) {
; CHECK-LABEL: @test_srem(
; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0
; CHECK-NEXT: [[T1:%.*]] = srem <2 x i16> [[SPLATINSERT]], <i16 2, i16 1>
; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 77, i16 99>, <2 x i16> [[SPLAT_OP]]
; CHECK-NEXT: ret <2 x i16> [[T2]]
;
%splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0
%t1 = srem <2 x i16> %splatinsert, <i16 2, i16 1>
%splat.op = shufflevector <2 x i16> %t1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
%t2 = select i1 %cmp, <2 x i16> <i16 77, i16 99>, <2 x i16> %splat.op
ret <2 x i16> %t2
}
define <2 x i16> @test_urem(i16 %a, i1 %cmp) {
; CHECK-LABEL: @test_urem(
; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0
; CHECK-NEXT: [[T1:%.*]] = urem <2 x i16> [[SPLATINSERT]], <i16 3, i16 1>
; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 77, i16 99>, <2 x i16> [[SPLAT_OP]]
; CHECK-NEXT: ret <2 x i16> [[T2]]
;
%splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0
%t1 = urem <2 x i16> %splatinsert, <i16 3, i16 1>
%splat.op = shufflevector <2 x i16> %t1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
%t2 = select i1 %cmp, <2 x i16> <i16 77, i16 99>, <2 x i16> %splat.op
ret <2 x i16> %t2
}
define <2 x i16> @test_sdiv(i16 %a, i1 %cmp) {
; CHECK-LABEL: @test_sdiv(
; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0
; CHECK-NEXT: [[T1:%.*]] = sdiv <2 x i16> [[SPLATINSERT]], <i16 2, i16 1>
; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 77, i16 99>, <2 x i16> [[SPLAT_OP]]
; CHECK-NEXT: ret <2 x i16> [[T2]]
;
%splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0
%t1 = sdiv <2 x i16> %splatinsert, <i16 2, i16 1>
%splat.op = shufflevector <2 x i16> %t1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
%t2 = select i1 %cmp, <2 x i16> <i16 77, i16 99>, <2 x i16> %splat.op
ret <2 x i16> %t2
}
define <2 x i16> @test_udiv(i16 %a, i1 %cmp) {
; CHECK-LABEL: @test_udiv(
; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0
; CHECK-NEXT: [[T1:%.*]] = udiv <2 x i16> [[SPLATINSERT]], <i16 3, i16 1>
; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 77, i16 99>, <2 x i16> [[SPLAT_OP]]
; CHECK-NEXT: ret <2 x i16> [[T2]]
;
%splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0
%t1 = udiv <2 x i16> %splatinsert, <i16 3, i16 1>
%splat.op = shufflevector <2 x i16> %t1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
%t2 = select i1 %cmp, <2 x i16> <i16 77, i16 99>, <2 x i16> %splat.op
ret <2 x i16> %t2
}
; For fdiv we do not need to worry about div by undef. Verify that the
; shufflevector is eliminated here.
define <2 x float> @test_fdiv(float %a, float %b, i1 %cmp) {
; CHECK-LABEL: @test_fdiv(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[A:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fdiv <2 x float> [[TMP1]], <float undef, float 3.000000e+00>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP2]]
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%splatinsert = insertelement <2 x float> poison, float %a, i32 0
%denom = insertelement <2 x float> <float 3.0, float undef>, float 1.0, i32 1
%t1 = fdiv <2 x float> %splatinsert, %denom
%splat.op = shufflevector <2 x float> %t1, <2 x float> undef, <2 x i32> <i32 undef, i32 0>
%t2 = select i1 %cmp, <2 x float> <float 77.0, float 99.0>, <2 x float> %splat.op
ret <2 x float> %t2
}
; For frem we do not need to worry about div by undef. Verify that the
; shufflevector is eliminated here.
define <2 x float> @test_frem(float %a, float %b, i1 %cmp) {
; CHECK-LABEL: @test_frem(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[A:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = frem <2 x float> [[TMP1]], <float undef, float 3.000000e+00>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP2]]
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%splatinsert = insertelement <2 x float> poison, float %a, i32 0
%denom = insertelement <2 x float> <float 3.0, float undef>, float 1.0, i32 1
%t1 = frem <2 x float> %splatinsert, %denom
%splat.op = shufflevector <2 x float> %t1, <2 x float> undef, <2 x i32> <i32 undef, i32 0>
%t2 = select i1 %cmp, <2 x float> <float 77.0, float 99.0>, <2 x float> %splat.op
ret <2 x float> %t2
}

View File

@ -0,0 +1,195 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ANY,LE
; RUN: opt < %s -instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ANY,BE
define i32 @shrinkExtractElt_i64_to_i32_0(<3 x i64> %x) {
; LE-LABEL: @shrinkExtractElt_i64_to_i32_0(
; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32>
; LE-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 0
; LE-NEXT: ret i32 [[T]]
;
; BE-LABEL: @shrinkExtractElt_i64_to_i32_0(
; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32>
; BE-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 1
; BE-NEXT: ret i32 [[T]]
;
%e = extractelement <3 x i64> %x, i32 0
%t = trunc i64 %e to i32
ret i32 %t
}
define i32 @vscale_shrinkExtractElt_i64_to_i32_0(<vscale x 3 x i64> %x) {
; LE-LABEL: @vscale_shrinkExtractElt_i64_to_i32_0(
; LE-NEXT: [[TMP1:%.*]] = bitcast <vscale x 3 x i64> [[X:%.*]] to <vscale x 6 x i32>
; LE-NEXT: [[T:%.*]] = extractelement <vscale x 6 x i32> [[TMP1]], i32 0
; LE-NEXT: ret i32 [[T]]
;
; BE-LABEL: @vscale_shrinkExtractElt_i64_to_i32_0(
; BE-NEXT: [[TMP1:%.*]] = bitcast <vscale x 3 x i64> [[X:%.*]] to <vscale x 6 x i32>
; BE-NEXT: [[T:%.*]] = extractelement <vscale x 6 x i32> [[TMP1]], i32 1
; BE-NEXT: ret i32 [[T]]
;
%e = extractelement <vscale x 3 x i64> %x, i32 0
%t = trunc i64 %e to i32
ret i32 %t
}
define i32 @shrinkExtractElt_i64_to_i32_1(<3 x i64> %x) {
; LE-LABEL: @shrinkExtractElt_i64_to_i32_1(
; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32>
; LE-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 2
; LE-NEXT: ret i32 [[T]]
;
; BE-LABEL: @shrinkExtractElt_i64_to_i32_1(
; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32>
; BE-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 3
; BE-NEXT: ret i32 [[T]]
;
%e = extractelement <3 x i64> %x, i32 1
%t = trunc i64 %e to i32
ret i32 %t
}
define i32 @shrinkExtractElt_i64_to_i32_2(<3 x i64> %x) {
; LE-LABEL: @shrinkExtractElt_i64_to_i32_2(
; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32>
; LE-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 4
; LE-NEXT: ret i32 [[T]]
;
; BE-LABEL: @shrinkExtractElt_i64_to_i32_2(
; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32>
; BE-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 5
; BE-NEXT: ret i32 [[T]]
;
%e = extractelement <3 x i64> %x, i32 2
%t = trunc i64 %e to i32
ret i32 %t
}
define i16 @shrinkExtractElt_i64_to_i16_0(<3 x i64> %x) {
; LE-LABEL: @shrinkExtractElt_i64_to_i16_0(
; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16>
; LE-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 0
; LE-NEXT: ret i16 [[T]]
;
; BE-LABEL: @shrinkExtractElt_i64_to_i16_0(
; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16>
; BE-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 3
; BE-NEXT: ret i16 [[T]]
;
%e = extractelement <3 x i64> %x, i16 0
%t = trunc i64 %e to i16
ret i16 %t
}
define i16 @shrinkExtractElt_i64_to_i16_1(<3 x i64> %x) {
; LE-LABEL: @shrinkExtractElt_i64_to_i16_1(
; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16>
; LE-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 4
; LE-NEXT: ret i16 [[T]]
;
; BE-LABEL: @shrinkExtractElt_i64_to_i16_1(
; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16>
; BE-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 7
; BE-NEXT: ret i16 [[T]]
;
%e = extractelement <3 x i64> %x, i16 1
%t = trunc i64 %e to i16
ret i16 %t
}
define i16 @shrinkExtractElt_i64_to_i16_2(<3 x i64> %x) {
; LE-LABEL: @shrinkExtractElt_i64_to_i16_2(
; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16>
; LE-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 8
; LE-NEXT: ret i16 [[T]]
;
; BE-LABEL: @shrinkExtractElt_i64_to_i16_2(
; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16>
; BE-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 11
; BE-NEXT: ret i16 [[T]]
;
%e = extractelement <3 x i64> %x, i16 2
%t = trunc i64 %e to i16
ret i16 %t
}
; Crazy types may be ok.
define i11 @shrinkExtractElt_i33_to_11_2(<3 x i33> %x) {
; LE-LABEL: @shrinkExtractElt_i33_to_11_2(
; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i33> [[X:%.*]] to <9 x i11>
; LE-NEXT: [[T:%.*]] = extractelement <9 x i11> [[TMP1]], i32 6
; LE-NEXT: ret i11 [[T]]
;
; BE-LABEL: @shrinkExtractElt_i33_to_11_2(
; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i33> [[X:%.*]] to <9 x i11>
; BE-NEXT: [[T:%.*]] = extractelement <9 x i11> [[TMP1]], i32 8
; BE-NEXT: ret i11 [[T]]
;
%e = extractelement <3 x i33> %x, i16 2
%t = trunc i33 %e to i11
ret i11 %t
}
; Do not optimize if it would result in an invalid bitcast instruction.
define i13 @shrinkExtractElt_i67_to_i13_2(<3 x i67> %x) {
; ANY-LABEL: @shrinkExtractElt_i67_to_i13_2(
; ANY-NEXT: [[E:%.*]] = extractelement <3 x i67> [[X:%.*]], i459 2
; ANY-NEXT: [[T:%.*]] = trunc i67 [[E]] to i13
; ANY-NEXT: ret i13 [[T]]
;
%e = extractelement <3 x i67> %x, i459 2
%t = trunc i67 %e to i13
ret i13 %t
}
; Do not optimize if the bitcast instruction would be valid, but the
; transform would be wrong.
define i30 @shrinkExtractElt_i40_to_i30_1(<3 x i40> %x) {
; ANY-LABEL: @shrinkExtractElt_i40_to_i30_1(
; ANY-NEXT: [[E:%.*]] = extractelement <3 x i40> [[X:%.*]], i32 1
; ANY-NEXT: [[T:%.*]] = trunc i40 [[E]] to i30
; ANY-NEXT: ret i30 [[T]]
;
%e = extractelement <3 x i40> %x, i32 1
%t = trunc i40 %e to i30
ret i30 %t
}
; Do not canonicalize if that would increase the instruction count.
declare void @use(i64)
define i16 @shrinkExtractElt_i64_to_i16_2_extra_use(<3 x i64> %x) {
; ANY-LABEL: @shrinkExtractElt_i64_to_i16_2_extra_use(
; ANY-NEXT: [[E:%.*]] = extractelement <3 x i64> [[X:%.*]], i64 2
; ANY-NEXT: call void @use(i64 [[E]])
; ANY-NEXT: [[T:%.*]] = trunc i64 [[E]] to i16
; ANY-NEXT: ret i16 [[T]]
;
%e = extractelement <3 x i64> %x, i64 2
call void @use(i64 %e)
%t = trunc i64 %e to i16
ret i16 %t
}
; Check to ensure PR45314 remains fixed.
define <4 x i64> @PR45314(<4 x i64> %x) {
; LE-LABEL: @PR45314(
; LE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[X:%.*]] to <8 x i32>
; LE-NEXT: [[S:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> zeroinitializer
; LE-NEXT: [[B:%.*]] = bitcast <8 x i32> [[S]] to <4 x i64>
; LE-NEXT: ret <4 x i64> [[B]]
;
; BE-LABEL: @PR45314(
; BE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[X:%.*]] to <8 x i32>
; BE-NEXT: [[S:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
; BE-NEXT: [[B:%.*]] = bitcast <8 x i32> [[S]] to <4 x i64>
; BE-NEXT: ret <4 x i64> [[B]]
;
%e = extractelement <4 x i64> %x, i32 0
%t = trunc i64 %e to i32
%i = insertelement <8 x i32> poison, i32 %t, i32 0
%s = shufflevector <8 x i32> %i, <8 x i32> undef, <8 x i32> zeroinitializer
%b = bitcast <8 x i32> %s to <4 x i64>
ret <4 x i64> %b
}

View File

@ -0,0 +1,27 @@
; RUN: opt -instcombine -S < %s | FileCheck %s
; This vscale udiv with a power-of-2 spalt on the rhs should not crash opt
; CHECK: define <vscale x 2 x i32> @udiv_pow2_vscale(<vscale x 2 x i32> %lhs)
define <vscale x 2 x i32> @udiv_pow2_vscale(<vscale x 2 x i32> %lhs) {
%splatter = insertelement <vscale x 2 x i32> poison, i32 2, i32 0
%rhs = shufflevector <vscale x 2 x i32> %splatter,
<vscale x 2 x i32> undef,
<vscale x 2 x i32> zeroinitializer
%res = udiv <vscale x 2 x i32> %lhs, %rhs
ret <vscale x 2 x i32> %res
}
; This fixed width udiv with a power-of-2 splat on the rhs should also not
; crash, and instcombine should eliminate the udiv
; CHECK-LABEL: define <2 x i32> @udiv_pow2_fixed(<2 x i32> %lhs)
; CHECK-NOT: udiv
define <2 x i32> @udiv_pow2_fixed(<2 x i32> %lhs) {
%splatter = insertelement <2 x i32> poison, i32 2, i32 0
%rhs = shufflevector <2 x i32> %splatter,
<2 x i32> undef,
<2 x i32> zeroinitializer
%res = udiv <2 x i32> %lhs, %rhs
ret <2 x i32> %res
}

View File

@ -0,0 +1,850 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define i32 @test2(float %f) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[T5:%.*]] = fmul float [[F:%.*]], [[F]]
; CHECK-NEXT: [[T21:%.*]] = bitcast float [[T5]] to i32
; CHECK-NEXT: ret i32 [[T21]]
;
%t5 = fmul float %f, %f
%t9 = insertelement <4 x float> poison, float %t5, i32 0
%t10 = insertelement <4 x float> %t9, float 0.000000e+00, i32 1
%t11 = insertelement <4 x float> %t10, float 0.000000e+00, i32 2
%t12 = insertelement <4 x float> %t11, float 0.000000e+00, i32 3
%t19 = bitcast <4 x float> %t12 to <4 x i32>
%t21 = extractelement <4 x i32> %t19, i32 0
ret i32 %t21
}
define void @get_image() nounwind {
; CHECK-LABEL: @get_image(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @fgetc(i8* null) [[ATTR0:#.*]]
; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: unreachable
;
entry:
%0 = call i32 @fgetc(i8* null) nounwind
%1 = trunc i32 %0 to i8
%t2 = insertelement <100 x i8> zeroinitializer, i8 %1, i32 1
%t1 = extractelement <100 x i8> %t2, i32 0
%2 = icmp eq i8 %t1, 80
br i1 %2, label %bb2, label %bb3
bb2: ; preds = %entry
br label %bb3
bb3: ; preds = %bb2, %entry
unreachable
}
; PR4340
define void @vac(<4 x float>* nocapture %a) nounwind {
; CHECK-LABEL: @vac(
; CHECK-NEXT: entry:
; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[A:%.*]], align 16
; CHECK-NEXT: ret void
;
entry:
%t1 = load <4 x float>, <4 x float>* %a ; <<4 x float>> [#uses=1]
%vecins = insertelement <4 x float> %t1, float 0.000000e+00, i32 0 ; <<4 x float>> [#uses=1]
%vecins4 = insertelement <4 x float> %vecins, float 0.000000e+00, i32 1; <<4 x float>> [#uses=1]
%vecins6 = insertelement <4 x float> %vecins4, float 0.000000e+00, i32 2; <<4 x float>> [#uses=1]
%vecins8 = insertelement <4 x float> %vecins6, float 0.000000e+00, i32 3; <<4 x float>> [#uses=1]
store <4 x float> %vecins8, <4 x float>* %a
ret void
}
declare i32 @fgetc(i8*)
define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind {
; CHECK-LABEL: @dead_shuffle_elt(
; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[SHUFFLE9_I:%.*]] = shufflevector <4 x float> [[SHUFFLE_I]], <4 x float> [[X:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: ret <4 x float> [[SHUFFLE9_I]]
;
%shuffle.i = shufflevector <2 x float> %y, <2 x float> %y, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%shuffle9.i = shufflevector <4 x float> %x, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
ret <4 x float> %shuffle9.i
}
define <2 x float> @test_fptrunc(double %f) {
; CHECK-LABEL: @test_fptrunc(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double [[F:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP2]]
;
%t9 = insertelement <4 x double> poison, double %f, i32 0
%t10 = insertelement <4 x double> %t9, double 0.000000e+00, i32 1
%t11 = insertelement <4 x double> %t10, double 0.000000e+00, i32 2
%t12 = insertelement <4 x double> %t11, double 0.000000e+00, i32 3
%t5 = fptrunc <4 x double> %t12 to <4 x float>
%ret = shufflevector <4 x float> %t5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
ret <2 x float> %ret
}
define <2 x double> @test_fpext(float %f) {
; CHECK-LABEL: @test_fpext(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> <float undef, float 0.000000e+00>, float [[F:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%t9 = insertelement <4 x float> poison, float %f, i32 0
%t10 = insertelement <4 x float> %t9, float 0.000000e+00, i32 1
%t11 = insertelement <4 x float> %t10, float 0.000000e+00, i32 2
%t12 = insertelement <4 x float> %t11, float 0.000000e+00, i32 3
%t5 = fpext <4 x float> %t12 to <4 x double>
%ret = shufflevector <4 x double> %t5, <4 x double> undef, <2 x i32> <i32 0, i32 1>
ret <2 x double> %ret
}
define <4 x double> @test_shuffle(<4 x double> %f) {
; CHECK-LABEL: @test_shuffle(
; CHECK-NEXT: [[RET1:%.*]] = insertelement <4 x double> [[F:%.*]], double 1.000000e+00, i32 3
; CHECK-NEXT: ret <4 x double> [[RET1]]
;
%ret = shufflevector <4 x double> %f, <4 x double> <double undef, double 1.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
ret <4 x double> %ret
}
define <4 x float> @test_select(float %f, float %g) {
; CHECK-LABEL: @test_select(
; CHECK-NEXT: [[A3:%.*]] = insertelement <4 x float> <float undef, float undef, float undef, float 3.000000e+00>, float [[F:%.*]], i32 0
; CHECK-NEXT: [[RET:%.*]] = shufflevector <4 x float> [[A3]], <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: ret <4 x float> [[RET]]
;
%a0 = insertelement <4 x float> poison, float %f, i32 0
%a1 = insertelement <4 x float> %a0, float 1.000000e+00, i32 1
%a2 = insertelement <4 x float> %a1, float 2.000000e+00, i32 2
%a3 = insertelement <4 x float> %a2, float 3.000000e+00, i32 3
%b0 = insertelement <4 x float> poison, float %g, i32 0
%b1 = insertelement <4 x float> %b0, float 4.000000e+00, i32 1
%b2 = insertelement <4 x float> %b1, float 5.000000e+00, i32 2
%b3 = insertelement <4 x float> %b2, float 6.000000e+00, i32 3
%ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> %b3
ret <4 x float> %ret
}
; Check that instcombine doesn't wrongly fold away the select completely.
define <2 x i64> @PR24922(<2 x i64> %v) {
; CHECK-LABEL: @PR24922(
; CHECK-NEXT: [[RESULT1:%.*]] = insertelement <2 x i64> [[V:%.*]], i64 0, i32 0
; CHECK-NEXT: ret <2 x i64> [[RESULT1]]
;
%result = select <2 x i1> <i1 icmp eq (i64 extractelement (<2 x i64> bitcast (<4 x i32> <i32 15, i32 15, i32 15, i32 15> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer
ret <2 x i64> %result
}
; The shuffle only demands the 0th (undef) element of 'out123', so everything should fold away.
define <4 x float> @inselt_shuf_no_demand(float %a1, float %a2, float %a3) {
; CHECK-LABEL: @inselt_shuf_no_demand(
; CHECK-NEXT: ret <4 x float> undef
;
%out1 = insertelement <4 x float> poison, float %a1, i32 1
%out12 = insertelement <4 x float> %out1, float %a2, i32 2
%out123 = insertelement <4 x float> %out12, float %a3, i32 3
%shuffle = shufflevector <4 x float> %out123, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
ret <4 x float> %shuffle
}
; The shuffle only demands the 0th (undef) element of 'out123', so everything should fold away.
define <4 x float> @inselt_shuf_no_demand_commute(float %a1, float %a2, float %a3) {
; CHECK-LABEL: @inselt_shuf_no_demand_commute(
; CHECK-NEXT: ret <4 x float> undef
;
%out1 = insertelement <4 x float> poison, float %a1, i32 1
%out12 = insertelement <4 x float> %out1, float %a2, i32 2
%out123 = insertelement <4 x float> %out12, float %a3, i32 3
%shuffle = shufflevector <4 x float> undef, <4 x float> %out123, <4 x i32> <i32 4, i32 undef, i32 undef, i32 undef>
ret <4 x float> %shuffle
}
; The add uses 'out012' giving it multiple uses after the shuffle is transformed to also
; use 'out012'. The analysis should be able to see past that.
define <4 x i32> @inselt_shuf_no_demand_multiuse(i32 %a0, i32 %a1, <4 x i32> %b) {
; CHECK-LABEL: @inselt_shuf_no_demand_multiuse(
; CHECK-NEXT: [[OUT0:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
; CHECK-NEXT: [[OUT01:%.*]] = insertelement <4 x i32> [[OUT0]], i32 [[A1:%.*]], i32 1
; CHECK-NEXT: [[FOO:%.*]] = add <4 x i32> [[OUT01]], [[B:%.*]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[FOO]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x i32> [[SHUFFLE]]
;
%out0 = insertelement <4 x i32> poison, i32 %a0, i32 0
%out01 = insertelement <4 x i32> %out0, i32 %a1, i32 1
%out012 = insertelement <4 x i32> %out01, i32 %a0, i32 2
%foo = add <4 x i32> %out012, %b
%out0123 = insertelement <4 x i32> %foo, i32 %a1, i32 3
%shuffle = shufflevector <4 x i32> %out0123, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
ret <4 x i32> %shuffle
}
define <4 x float> @inselt_shuf_no_demand_bogus_insert_index_in_chain(float %a1, float %a2, float %a3, i32 %variable_index) {
; CHECK-LABEL: @inselt_shuf_no_demand_bogus_insert_index_in_chain(
; CHECK-NEXT: [[OUT12:%.*]] = insertelement <4 x float> poison, float [[A2:%.*]], i32 [[VARIABLE_INDEX:%.*]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[OUT12]], <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x float> [[SHUFFLE]]
;
%out1 = insertelement <4 x float> poison, float %a1, i32 1
%out12 = insertelement <4 x float> %out1, float %a2, i32 %variable_index ; something unexpected
%out123 = insertelement <4 x float> %out12, float %a3, i32 3
%shuffle = shufflevector <4 x float> %out123, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
ret <4 x float> %shuffle
}
; Test undef replacement in constant vector elements with binops.
define <3 x i8> @shuf_add(<3 x i8> %x) {
; CHECK-LABEL: @shuf_add(
; CHECK-NEXT: [[BO:%.*]] = add <3 x i8> [[X:%.*]], <i8 undef, i8 2, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 2>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = add nsw <3 x i8> %x, <i8 1, i8 2, i8 3>
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 2>
ret <3 x i8> %r
}
define <3 x i8> @shuf_sub(<3 x i8> %x) {
; CHECK-LABEL: @shuf_sub(
; CHECK-NEXT: [[BO:%.*]] = sub <3 x i8> <i8 1, i8 undef, i8 3>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 2>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = sub nuw <3 x i8> <i8 1, i8 2, i8 3>, %x
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 2>
ret <3 x i8> %r
}
define <3 x i8> @shuf_mul(<3 x i8> %x) {
; CHECK-LABEL: @shuf_mul(
; CHECK-NEXT: [[BO:%.*]] = mul <3 x i8> [[X:%.*]], <i8 1, i8 undef, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 0, i32 2, i32 0>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = mul nsw <3 x i8> %x, <i8 1, i8 2, i8 3>
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 0, i32 2, i32 0>
ret <3 x i8> %r
}
define <3 x i8> @shuf_and(<3 x i8> %x) {
; CHECK-LABEL: @shuf_and(
; CHECK-NEXT: [[BO:%.*]] = and <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 1, i32 1, i32 0>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = and <3 x i8> %x, <i8 1, i8 2, i8 3>
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 1, i32 1, i32 0>
ret <3 x i8> %r
}
define <3 x i8> @shuf_or(<3 x i8> %x) {
; CHECK-LABEL: @shuf_or(
; CHECK-NEXT: [[BO:%.*]] = or <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 0>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = or <3 x i8> %x, <i8 1, i8 2, i8 3>
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 0>
ret <3 x i8> %r
}
define <3 x i8> @shuf_xor(<3 x i8> %x) {
; CHECK-LABEL: @shuf_xor(
; CHECK-NEXT: [[BO:%.*]] = xor <3 x i8> [[X:%.*]], <i8 1, i8 undef, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = xor <3 x i8> %x, <i8 1, i8 2, i8 3>
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
ret <3 x i8> %r
}
define <3 x i8> @shuf_lshr_const_op0(<3 x i8> %x) {
; CHECK-LABEL: @shuf_lshr_const_op0(
; CHECK-NEXT: [[BO:%.*]] = lshr <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 1, i32 undef>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = lshr <3 x i8> <i8 1, i8 2, i8 3>, %x
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 1, i32 undef>
ret <3 x i8> %r
}
define <3 x i8> @shuf_lshr_const_op1(<3 x i8> %x) {
; CHECK-LABEL: @shuf_lshr_const_op1(
; CHECK-NEXT: [[BO:%.*]] = lshr exact <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 1, i32 undef>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = lshr exact <3 x i8> %x, <i8 1, i8 2, i8 3>
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 1, i32 undef>
ret <3 x i8> %r
}
define <3 x i8> @shuf_ashr_const_op0(<3 x i8> %x) {
; CHECK-LABEL: @shuf_ashr_const_op0(
; CHECK-NEXT: [[BO:%.*]] = lshr <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 1>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = ashr <3 x i8> <i8 1, i8 2, i8 3>, %x
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 1>
ret <3 x i8> %r
}
define <3 x i8> @shuf_ashr_const_op1(<3 x i8> %x) {
; CHECK-LABEL: @shuf_ashr_const_op1(
; CHECK-NEXT: [[BO:%.*]] = ashr exact <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 1>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = ashr exact <3 x i8> %x, <i8 1, i8 2, i8 3>
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 1>
ret <3 x i8> %r
}
define <3 x i8> @shuf_shl_const_op0(<3 x i8> %x) {
; CHECK-LABEL: @shuf_shl_const_op0(
; CHECK-NEXT: [[BO:%.*]] = shl nsw <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = shl nsw <3 x i8> <i8 1, i8 2, i8 3>, %x
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
ret <3 x i8> %r
}
define <3 x i8> @shuf_shl_const_op1(<3 x i8> %x) {
; CHECK-LABEL: @shuf_shl_const_op1(
; CHECK-NEXT: [[BO:%.*]] = shl nuw <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = shl nuw <3 x i8> %x, <i8 1, i8 2, i8 3>
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
ret <3 x i8> %r
}
define <3 x i8> @shuf_sdiv_const_op0(<3 x i8> %x) {
; CHECK-LABEL: @shuf_sdiv_const_op0(
; CHECK-NEXT: [[BO:%.*]] = sdiv exact <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 1>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = sdiv exact <3 x i8> <i8 1, i8 2, i8 3>, %x
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 1>
ret <3 x i8> %r
}
define <3 x i8> @shuf_sdiv_const_op1(<3 x i8> %x) {
; CHECK-LABEL: @shuf_sdiv_const_op1(
; CHECK-NEXT: [[BO:%.*]] = sdiv <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 0>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = sdiv <3 x i8> %x, <i8 1, i8 2, i8 3>
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 0>
ret <3 x i8> %r
}
define <3 x i8> @shuf_srem_const_op0(<3 x i8> %x) {
; CHECK-LABEL: @shuf_srem_const_op0(
; CHECK-NEXT: [[BO:%.*]] = srem <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 2>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = srem <3 x i8> <i8 1, i8 2, i8 3>, %x
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 2>
ret <3 x i8> %r
}
define <3 x i8> @shuf_srem_const_op1(<3 x i8> %x) {
; CHECK-LABEL: @shuf_srem_const_op1(
; CHECK-NEXT: [[BO:%.*]] = srem <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 1>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = srem <3 x i8> %x, <i8 1, i8 2, i8 3>
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 1>
ret <3 x i8> %r
}
define <3 x i8> @shuf_udiv_const_op0(<3 x i8> %x) {
; CHECK-LABEL: @shuf_udiv_const_op0(
; CHECK-NEXT: [[BO:%.*]] = udiv exact <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = udiv exact <3 x i8> <i8 1, i8 2, i8 3>, %x
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
ret <3 x i8> %r
}
define <3 x i8> @shuf_udiv_const_op1(<3 x i8> %x) {
; CHECK-LABEL: @shuf_udiv_const_op1(
; CHECK-NEXT: [[BO:%.*]] = udiv <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = udiv <3 x i8> %x, <i8 1, i8 2, i8 3>
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
ret <3 x i8> %r
}
define <3 x i8> @shuf_urem_const_op0(<3 x i8> %x) {
; CHECK-LABEL: @shuf_urem_const_op0(
; CHECK-NEXT: [[BO:%.*]] = urem <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 1, i32 undef>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = urem <3 x i8> <i8 1, i8 2, i8 3>, %x
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 1, i32 undef>
ret <3 x i8> %r
}
define <3 x i8> @shuf_urem_const_op1(<3 x i8> %x) {
; CHECK-LABEL: @shuf_urem_const_op1(
; CHECK-NEXT: [[BO:%.*]] = urem <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 undef, i32 1, i32 0>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = urem <3 x i8> %x, <i8 1, i8 2, i8 3>
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 undef, i32 1, i32 0>
ret <3 x i8> %r
}
define <3 x float> @shuf_fadd(<3 x float> %x) {
; CHECK-LABEL: @shuf_fadd(
; CHECK-NEXT: [[BO:%.*]] = fadd <3 x float> [[X:%.*]], <float 1.000000e+00, float 2.000000e+00, float undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 undef, i32 1, i32 0>
; CHECK-NEXT: ret <3 x float> [[R]]
;
%bo = fadd <3 x float> %x, <float 1.0, float 2.0, float 3.0>
%r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 undef, i32 1, i32 0>
ret <3 x float> %r
}
define <3 x float> @shuf_fsub(<3 x float> %x) {
; CHECK-LABEL: @shuf_fsub(
; CHECK-NEXT: [[BO:%.*]] = fsub fast <3 x float> <float 1.000000e+00, float undef, float 3.000000e+00>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 undef, i32 0, i32 2>
; CHECK-NEXT: ret <3 x float> [[R]]
;
%bo = fsub fast <3 x float> <float 1.0, float 2.0, float 3.0>, %x
%r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 undef, i32 0, i32 2>
ret <3 x float> %r
}
define <3 x float> @shuf_fmul(<3 x float> %x) {
; CHECK-LABEL: @shuf_fmul(
; CHECK-NEXT: [[BO:%.*]] = fmul reassoc <3 x float> [[X:%.*]], <float 1.000000e+00, float 2.000000e+00, float undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 undef, i32 1, i32 0>
; CHECK-NEXT: ret <3 x float> [[R]]
;
%bo = fmul reassoc <3 x float> %x, <float 1.0, float 2.0, float 3.0>
%r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 undef, i32 1, i32 0>
ret <3 x float> %r
}
define <3 x float> @shuf_fdiv_const_op0(<3 x float> %x) {
; CHECK-LABEL: @shuf_fdiv_const_op0(
; CHECK-NEXT: [[BO:%.*]] = fdiv reassoc ninf <3 x float> <float 1.000000e+00, float undef, float 3.000000e+00>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 undef, i32 0, i32 2>
; CHECK-NEXT: ret <3 x float> [[R]]
;
%bo = fdiv ninf reassoc <3 x float> <float 1.0, float 2.0, float 3.0>, %x
%r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 undef, i32 0, i32 2>
ret <3 x float> %r
}
define <3 x float> @shuf_fdiv_const_op1(<3 x float> %x) {
; CHECK-LABEL: @shuf_fdiv_const_op1(
; CHECK-NEXT: [[BO:%.*]] = fdiv nnan ninf <3 x float> [[X:%.*]], <float 1.000000e+00, float 2.000000e+00, float undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 undef, i32 1, i32 0>
; CHECK-NEXT: ret <3 x float> [[R]]
;
%bo = fdiv ninf nnan <3 x float> %x, <float 1.0, float 2.0, float 3.0>
%r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 undef, i32 1, i32 0>
ret <3 x float> %r
}
define <3 x float> @shuf_frem_const_op0(<3 x float> %x) {
; CHECK-LABEL: @shuf_frem_const_op0(
; CHECK-NEXT: [[BO:%.*]] = frem nnan <3 x float> <float 1.000000e+00, float undef, float 3.000000e+00>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 undef, i32 2, i32 0>
; CHECK-NEXT: ret <3 x float> [[R]]
;
%bo = frem nnan <3 x float> <float 1.0, float 2.0, float 3.0>, %x
%r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 undef, i32 2, i32 0>
ret <3 x float> %r
}
define <3 x float> @shuf_frem_const_op1(<3 x float> %x) {
; CHECK-LABEL: @shuf_frem_const_op1(
; CHECK-NEXT: [[BO:%.*]] = frem reassoc ninf <3 x float> [[X:%.*]], <float undef, float 2.000000e+00, float 3.000000e+00>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 1, i32 undef, i32 2>
; CHECK-NEXT: ret <3 x float> [[R]]
;
%bo = frem ninf reassoc <3 x float> %x, <float 1.0, float 2.0, float 3.0>
%r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 1, i32 undef, i32 2>
ret <3 x float> %r
}
;; TODO: getelementptr tests below show missing simplifications for
;; vector demanded elements on vector geps.
define i32* @gep_vbase_w_s_idx(<2 x i32*> %base) {
; CHECK-LABEL: @gep_vbase_w_s_idx(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASE:%.*]], i64 1
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
; CHECK-NEXT: ret i32* [[EE]]
;
%gep = getelementptr i32, <2 x i32*> %base, i64 1
%ee = extractelement <2 x i32*> %gep, i32 1
ret i32* %ee
}
define i32* @gep_splat_base_w_s_idx(i32* %base) {
; CHECK-LABEL: @gep_splat_base_w_s_idx(
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], i64 1
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
; CHECK-NEXT: ret i32* [[EE]]
;
%basevec1 = insertelement <2 x i32*> poison, i32* %base, i32 0
%basevec2 = shufflevector <2 x i32*> %basevec1, <2 x i32*> undef, <2 x i32> zeroinitializer
%gep = getelementptr i32, <2 x i32*> %basevec2, i64 1
%ee = extractelement <2 x i32*> %gep, i32 1
ret i32* %ee
}
define i32* @gep_splat_base_w_cv_idx(i32* %base) {
; CHECK-LABEL: @gep_splat_base_w_cv_idx(
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> <i64 undef, i64 1>
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
; CHECK-NEXT: ret i32* [[EE]]
;
%basevec1 = insertelement <2 x i32*> poison, i32* %base, i32 0
%basevec2 = shufflevector <2 x i32*> %basevec1, <2 x i32*> undef, <2 x i32> zeroinitializer
%gep = getelementptr i32, <2 x i32*> %basevec2, <2 x i64> <i64 0, i64 1>
%ee = extractelement <2 x i32*> %gep, i32 1
ret i32* %ee
}
define i32* @gep_splat_base_w_vidx(i32* %base, <2 x i64> %idxvec) {
; CHECK-LABEL: @gep_splat_base_w_vidx(
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> [[IDXVEC:%.*]]
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
; CHECK-NEXT: ret i32* [[EE]]
;
%basevec1 = insertelement <2 x i32*> poison, i32* %base, i32 0
%basevec2 = shufflevector <2 x i32*> %basevec1, <2 x i32*> undef, <2 x i32> zeroinitializer
%gep = getelementptr i32, <2 x i32*> %basevec2, <2 x i64> %idxvec
%ee = extractelement <2 x i32*> %gep, i32 1
ret i32* %ee
}
@GLOBAL = internal global i32 zeroinitializer
define i32* @gep_cvbase_w_s_idx(<2 x i32*> %base, i64 %raw_addr) {
; CHECK-LABEL: @gep_cvbase_w_s_idx(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> <i32* undef, i32* @GLOBAL>, i64 [[RAW_ADDR:%.*]]
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
; CHECK-NEXT: ret i32* [[EE]]
;
%gep = getelementptr i32, <2 x i32*> <i32* @GLOBAL, i32* @GLOBAL>, i64 %raw_addr
%ee = extractelement <2 x i32*> %gep, i32 1
ret i32* %ee
}
define i32* @gep_cvbase_w_cv_idx(<2 x i32*> %base, i64 %raw_addr) {
; CHECK-LABEL: @gep_cvbase_w_cv_idx(
; CHECK-NEXT: ret i32* getelementptr inbounds (i32, i32* @GLOBAL, i64 1)
;
%gep = getelementptr i32, <2 x i32*> <i32* @GLOBAL, i32* @GLOBAL>, <2 x i64> <i64 0, i64 1>
%ee = extractelement <2 x i32*> %gep, i32 1
ret i32* %ee
}
define i32* @gep_sbase_w_cv_idx(i32* %base) {
; CHECK-LABEL: @gep_sbase_w_cv_idx(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> <i64 undef, i64 1>
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
; CHECK-NEXT: ret i32* [[EE]]
;
%gep = getelementptr i32, i32* %base, <2 x i64> <i64 0, i64 1>
%ee = extractelement <2 x i32*> %gep, i32 1
ret i32* %ee
}
define i32* @gep_sbase_w_splat_idx(i32* %base, i64 %idx) {
; CHECK-LABEL: @gep_sbase_w_splat_idx(
; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> [[IDXVEC2]]
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
; CHECK-NEXT: ret i32* [[EE]]
;
%idxvec1 = insertelement <2 x i64> poison, i64 %idx, i32 0
%idxvec2 = shufflevector <2 x i64> %idxvec1, <2 x i64> undef, <2 x i32> zeroinitializer
%gep = getelementptr i32, i32* %base, <2 x i64> %idxvec2
%ee = extractelement <2 x i32*> %gep, i32 1
ret i32* %ee
}
define i32* @gep_splat_both(i32* %base, i64 %idx) {
; CHECK-LABEL: @gep_splat_both(
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1
; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> [[IDXVEC2]]
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
; CHECK-NEXT: ret i32* [[EE]]
;
%basevec1 = insertelement <2 x i32*> poison, i32* %base, i32 0
%basevec2 = shufflevector <2 x i32*> %basevec1, <2 x i32*> undef, <2 x i32> zeroinitializer
%idxvec1 = insertelement <2 x i64> poison, i64 %idx, i32 0
%idxvec2 = shufflevector <2 x i64> %idxvec1, <2 x i64> undef, <2 x i32> zeroinitializer
%gep = getelementptr i32, <2 x i32*> %basevec2, <2 x i64> %idxvec2
%ee = extractelement <2 x i32*> %gep, i32 1
ret i32* %ee
}
define <2 x i32*> @gep_all_lanes_undef(i32* %base, i64 %idx) {;
; CHECK-LABEL: @gep_all_lanes_undef(
; CHECK-NEXT: ret <2 x i32*> undef
;
%basevec = insertelement <2 x i32*> poison, i32* %base, i32 0
%idxvec = insertelement <2 x i64> poison, i64 %idx, i32 1
%gep = getelementptr i32, <2 x i32*> %basevec, <2 x i64> %idxvec
ret <2 x i32*> %gep
}
define i32* @gep_demanded_lane_undef(i32* %base, i64 %idx) {
; CHECK-LABEL: @gep_demanded_lane_undef(
; CHECK-NEXT: ret i32* undef
;
%basevec = insertelement <2 x i32*> poison, i32* %base, i32 0
%idxvec = insertelement <2 x i64> poison, i64 %idx, i32 1
%gep = getelementptr i32, <2 x i32*> %basevec, <2 x i64> %idxvec
%ee = extractelement <2 x i32*> %gep, i32 1
ret i32* %ee
}
;; LangRef has an odd quirk around FCAs which make it illegal to use undef
;; indices.
define i32* @PR41624(<2 x { i32, i32 }*> %a) {
; CHECK-LABEL: @PR41624(
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x { i32, i32 }*> [[A:%.*]], <2 x i64> <i64 5, i64 5>, <2 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i32*> [[W]], i32 0
; CHECK-NEXT: ret i32* [[R]]
;
%w = getelementptr { i32, i32 }, <2 x { i32, i32 }*> %a, <2 x i64> <i64 5, i64 5>, <2 x i32> zeroinitializer
%r = extractelement <2 x i32*> %w, i32 0
ret i32* %r
}
@global = external global [0 x i32], align 4
; Make sure we don't get stuck in a loop turning the zeroinitializer into
; <0, undef, undef, undef> and then changing it back.
define i32* @zero_sized_type_extract(<4 x i64> %arg, i64 %arg1) {
; CHECK-LABEL: @zero_sized_type_extract(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds [0 x i32], <4 x [0 x i32]*> <[0 x i32]* @global, [0 x i32]* undef, [0 x i32]* undef, [0 x i32]* undef>, <4 x i64> <i64 0, i64 undef, i64 undef, i64 undef>, <4 x i64> [[ARG:%.*]]
; CHECK-NEXT: [[T2:%.*]] = extractelement <4 x i32*> [[T]], i64 0
; CHECK-NEXT: ret i32* [[T2]]
;
bb:
%t = getelementptr inbounds [0 x i32], <4 x [0 x i32]*> <[0 x i32]* @global, [0 x i32]* @global, [0 x i32]* @global, [0 x i32]* @global>, <4 x i64> zeroinitializer, <4 x i64> %arg
%t2 = extractelement <4 x i32*> %t, i64 0
ret i32* %t2
}
; The non-zero elements of the result are always 'y', so the splat is unnecessary.
define <4 x i8> @select_cond_with_eq_true_false_elts(<4 x i8> %x, <4 x i8> %y, <4 x i1> %cmp) {
; CHECK-LABEL: @select_cond_with_eq_true_false_elts(
; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[SEL]], <4 x i8> [[Y]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
%tval = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
%splat = shufflevector <4 x i1> %cmp, <4 x i1> undef, <4 x i32> zeroinitializer
%r = select <4 x i1> %splat, <4 x i8> %tval, <4 x i8> %y
ret <4 x i8> %r
}
; First element of the result is always x[0], so first element of select condition is unnecessary.
define <4 x i8> @select_cond_with_eq_true_false_elts2(<4 x i8> %x, <4 x i8> %y, <4 x i1> %cmp) {
; CHECK-LABEL: @select_cond_with_eq_true_false_elts2(
; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <4 x i32> <i32 undef, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND]], <4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[SEL]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
%tval = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
%cond = shufflevector <4 x i1> %cmp, <4 x i1> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%r = select <4 x i1> %cond, <4 x i8> %tval, <4 x i8> %x
ret <4 x i8> %r
}
; Second element of the result is always x[3], so second element of select condition is unnecessary.
; Fourth element of the result is always undef, so fourth element of select condition is unnecessary.
define <4 x float> @select_cond_with_eq_true_false_elts3(<4 x float> %x, <4 x float> %y, <4 x i1> %cmp) {
; CHECK-LABEL: @select_cond_with_eq_true_false_elts3(
; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> <i32 1, i32 3, i32 5, i32 undef>
; CHECK-NEXT: [[FVAL:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[X]], <4 x i32> <i32 0, i32 7, i32 6, i32 undef>
; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x float> [[TVAL]], <4 x float> [[FVAL]]
; CHECK-NEXT: ret <4 x float> [[R]]
;
%tval = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 undef>
%fval = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 0, i32 7, i32 6, i32 undef>
%cond = shufflevector <4 x i1> %cmp, <4 x i1> undef, <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
%r = select <4 x i1> %cond, <4 x float> %tval, <4 x float> %fval
ret <4 x float> %r
}
define <4 x i8> @select_cond_with_undef_true_false_elts(<4 x i8> %x, <4 x i8> %y, <4 x i1> %cmp) {
; CHECK-LABEL: @select_cond_with_undef_true_false_elts(
; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x i8> [[TVAL]], <4 x i8> [[X:%.*]]
; CHECK-NEXT: ret <4 x i8> [[R]]
;
%tval = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> <i32 undef, i32 5, i32 6, i32 7>
%cond = shufflevector <4 x i1> %cmp, <4 x i1> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%r = select <4 x i1> %cond, <4 x i8> %tval, <4 x i8> %x
ret <4 x i8> %r
}
; The insert can be safely eliminated because the shuffle blocks poison from cmp[0].
define <4 x i8> @select_cond_(<4 x i8> %x, <4 x i8> %min, <4 x i1> %cmp, i1 %poison_blocker) {
; CHECK-LABEL: @select_cond_(
; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i8> [[MIN:%.*]], <4 x i8> [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[SEL]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
%ins = insertelement <4 x i1> %cmp, i1 %poison_blocker, i32 0
%vecins = shufflevector <4 x i8> %x, <4 x i8> %min, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
%r = select <4 x i1> %ins, <4 x i8> %vecins, <4 x i8> %x
ret <4 x i8> %r
}
define <4 x float> @ins_of_ext(<4 x float> %x, float %y) {
; CHECK-LABEL: @ins_of_ext(
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> [[X:%.*]], float [[Y:%.*]], i32 1
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x float> [[I1]], float [[Y]], i32 2
; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I2]], float [[Y]], i32 3
; CHECK-NEXT: ret <4 x float> [[I3]]
;
%e0 = extractelement <4 x float> %x, i32 0
%i0 = insertelement <4 x float> poison, float %e0, i32 0
%i1 = insertelement <4 x float> %i0, float %y, i32 1
%i2 = insertelement <4 x float> %i1, float %y, i32 2
%i3 = insertelement <4 x float> %i2, float %y, i32 3
ret <4 x float> %i3
}
define <4 x float> @ins_of_ext_twice(<4 x float> %x, float %y) {
; CHECK-LABEL: @ins_of_ext_twice(
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x float> [[X:%.*]], float [[Y:%.*]], i32 2
; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I2]], float [[Y]], i32 3
; CHECK-NEXT: ret <4 x float> [[I3]]
;
%e0 = extractelement <4 x float> %x, i32 0
%i0 = insertelement <4 x float> poison, float %e0, i32 0
%e1 = extractelement <4 x float> %x, i32 1
%i1 = insertelement <4 x float> %i0, float %e1, i32 1
%i2 = insertelement <4 x float> %i1, float %y, i32 2
%i3 = insertelement <4 x float> %i2, float %y, i32 3
ret <4 x float> %i3
}
; Negative test - element 3 of the result must be undef to be poison safe.
; TODO: Could convert insert/extract to identity shuffle with undef mask elements.
define <4 x float> @ins_of_ext_wrong_demand(<4 x float> %x, float %y) {
; CHECK-LABEL: @ins_of_ext_wrong_demand(
; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> poison, float [[E0]], i32 0
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> [[I0]], float [[Y:%.*]], i32 1
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x float> [[I1]], float [[Y]], i32 2
; CHECK-NEXT: ret <4 x float> [[I2]]
;
%e0 = extractelement <4 x float> %x, i32 0
%i0 = insertelement <4 x float> poison, float %e0, i32 0
%i1 = insertelement <4 x float> %i0, float %y, i32 1
%i2 = insertelement <4 x float> %i1, float %y, i32 2
ret <4 x float> %i2
}
; Negative test - can't replace i0 with x.
; TODO: Could convert insert/extract to identity shuffle with undef mask elements.
define <4 x float> @ins_of_ext_wrong_type(<5 x float> %x, float %y) {
; CHECK-LABEL: @ins_of_ext_wrong_type(
; CHECK-NEXT: [[E0:%.*]] = extractelement <5 x float> [[X:%.*]], i32 0
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> poison, float [[E0]], i32 0
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> [[I0]], float [[Y:%.*]], i32 1
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x float> [[I1]], float [[Y]], i32 2
; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I2]], float [[Y]], i32 3
; CHECK-NEXT: ret <4 x float> [[I3]]
;
%e0 = extractelement <5 x float> %x, i32 0
%i0 = insertelement <4 x float> poison, float %e0, i32 0
%i1 = insertelement <4 x float> %i0, float %y, i32 1
%i2 = insertelement <4 x float> %i1, float %y, i32 2
%i3 = insertelement <4 x float> %i2, float %y, i32 3
ret <4 x float> %i3
}
; This should reduce, but the shuffle mask must remain as-is (no extra undef).
define <4 x i4> @ins_of_ext_undef_elts_propagation(<4 x i4> %v, <4 x i4> %v2, i4 %x) {
; CHECK-LABEL: @ins_of_ext_undef_elts_propagation(
; CHECK-NEXT: [[T2:%.*]] = insertelement <4 x i4> [[V:%.*]], i4 [[X:%.*]], i32 2
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i4> [[T2]], <4 x i4> [[V2:%.*]], <4 x i32> <i32 0, i32 6, i32 2, i32 7>
; CHECK-NEXT: ret <4 x i4> [[R]]
;
%v0 = extractelement <4 x i4> %v, i32 0
%t0 = insertelement <4 x i4> poison, i4 %v0, i32 0
%t2 = insertelement <4 x i4> %t0, i4 %x, i32 2
%r = shufflevector <4 x i4> %t2, <4 x i4> %v2, <4 x i32> <i32 0, i32 6, i32 2, i32 7>
ret <4 x i4> %r
}
; Similar to above, but more ops/uses to verify things work in more complicated cases.
define <8 x i4> @ins_of_ext_undef_elts_propagation2(<8 x i4> %v, <8 x i4> %v2, i4 %x) {
; CHECK-LABEL: @ins_of_ext_undef_elts_propagation2(
; CHECK-NEXT: [[I19:%.*]] = insertelement <8 x i4> [[V:%.*]], i4 [[X:%.*]], i32 2
; CHECK-NEXT: [[I20:%.*]] = shufflevector <8 x i4> [[I19]], <8 x i4> [[V2:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 10, i32 9, i32 8, i32 undef>
; CHECK-NEXT: [[I21:%.*]] = shufflevector <8 x i4> [[I20]], <8 x i4> [[V]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
; CHECK-NEXT: ret <8 x i4> [[I21]]
;
%i15 = extractelement <8 x i4> %v, i32 0
%i16 = insertelement <8 x i4> poison, i4 %i15, i32 0
%i17 = extractelement <8 x i4> %v, i32 1
%i18 = insertelement <8 x i4> %i16, i4 %i17, i32 1
%i19 = insertelement <8 x i4> %i18, i4 %x, i32 2
%i20 = shufflevector <8 x i4> %i19, <8 x i4> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 10, i32 9, i32 8, i32 undef>
%i21 = shufflevector <8 x i4> %i20, <8 x i4> %v, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
ret <8 x i4> %i21
}

View File

@ -0,0 +1,26 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
define void @test (float %b, <8 x float> * %p) {
; CHECK: extractelement
; CHECK: fptosi
%1 = load <8 x float> , <8 x float> * %p
%2 = bitcast <8 x float> %1 to <8 x i32>
%3 = bitcast <8 x i32> %2 to <8 x float>
%a = fptosi <8 x float> %3 to <8 x i32>
%4 = fptosi float %b to i32
%5 = add i32 %4, -2
%6 = extractelement <8 x i32> %a, i32 %5
%7 = insertelement <8 x i32> poison, i32 %6, i32 7
%8 = sitofp <8 x i32> %7 to <8 x float>
store <8 x float> %8, <8 x float>* %p
ret void
}
; PR18600
define i32 @test2(i32 %i) {
%e = extractelement <4 x i32> bitcast (<2 x i64> <i64 1, i64 2> to <4 x i32>), i32 %i
ret i32 %e
; CHECK-LABEL: @test2
; CHECK: extractelement
}

View File

@ -0,0 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instcombine -S < %s | FileCheck %s
define <4 x i16*> @PR41270([4 x i16]* %x) {
; CHECK-LABEL: @PR41270(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x [4 x i16]*> undef, [4 x i16]* [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i16], <4 x [4 x i16]*> [[TMP1]], i64 0, i64 3
; CHECK-NEXT: ret <4 x i16*> [[TMP2]]
;
%ins = insertelement <4 x [4 x i16]*> poison, [4 x i16]* %x, i32 0
%splat = shufflevector <4 x [4 x i16]*> %ins, <4 x [4 x i16]*> undef, <4 x i32> zeroinitializer
%t2 = getelementptr inbounds [4 x i16], <4 x [4 x i16]*> %splat, i32 0, i32 3
%t3 = extractelement <4 x i16*> %t2, i32 3
%ins2 = insertelement <4 x i16*> poison, i16* %t3, i32 0
ret <4 x i16*> %ins2
}

View File

@ -0,0 +1,107 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
define void @f(i64 %val, i32 %limit, i32 *%ptr) {
; CHECK-LABEL: @f
; CHECK: %0 = trunc i64 %val to i32
; CHECK: %1 = phi i32 [ %0, %entry ], [ {{.*}}, %loop ]
entry:
%tempvector = insertelement <16 x i64> poison, i64 %val, i32 0
%vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
%0 = add <16 x i64> %vector, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
%1 = trunc <16 x i64> %0 to <16 x i32>
br label %loop
loop:
%2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
%elt = extractelement <16 x i32> %2, i32 0
%end = icmp ult i32 %elt, %limit
%3 = add i32 10, %elt
%4 = sext i32 %elt to i64
%5 = getelementptr i32, i32* %ptr, i64 %4
store i32 %3, i32* %5
%inc = add <16 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
br i1 %end, label %loop, label %ret
ret:
ret void
}
define void @copy(i64 %val, i32 %limit, i32 *%ptr) {
; CHECK-LABEL: @copy
; CHECK: %0 = trunc i64 %val to i32
; CHECK: %1 = phi i32 [ %0, %entry ], [ {{.*}}, %loop ]
entry:
%tempvector = insertelement <16 x i64> poison, i64 %val, i32 0
%vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
%0 = add <16 x i64> %vector, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
%1 = trunc <16 x i64> %0 to <16 x i32>
br label %loop
loop:
%2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
%elt = extractelement <16 x i32> %2, i32 0
%eltcopy = extractelement <16 x i32> %2, i32 0
%end = icmp ult i32 %elt, %limit
%3 = add i32 10, %eltcopy
%4 = sext i32 %elt to i64
%5 = getelementptr i32, i32* %ptr, i64 %4
store i32 %3, i32* %5
%inc = add <16 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
br i1 %end, label %loop, label %ret
ret:
ret void
}
define void @nocopy(i64 %val, i32 %limit, i32 *%ptr) {
; CHECK-LABEL: @nocopy
; CHECK-NOT: phi i32
; CHECK: phi <16 x i32> [ %3, %entry ], [ %inc, %loop ]
entry:
%tempvector = insertelement <16 x i64> poison, i64 %val, i32 0
%vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
%0 = add <16 x i64> %vector, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
%1 = trunc <16 x i64> %0 to <16 x i32>
br label %loop
loop:
%2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
%elt = extractelement <16 x i32> %2, i32 0
%eltcopy = extractelement <16 x i32> %2, i32 1
%end = icmp ult i32 %elt, %limit
%3 = add i32 10, %eltcopy
%4 = sext i32 %elt to i64
%5 = getelementptr i32, i32* %ptr, i64 %4
store i32 %3, i32* %5
%inc = add <16 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
br i1 %end, label %loop, label %ret
ret:
ret void
}
define i1 @g(<3 x i32> %input_2) {
; CHECK-LABEL: @g
; CHECK: extractelement <3 x i32> %input_2, i32 0
entry:
br label %for.cond
for.cond:
%input_2.addr.0 = phi <3 x i32> [ %input_2, %entry ], [ %div45, %for.body ]
%input_1.addr.1 = phi <3 x i32> [ undef, %entry ], [ %dec43, %for.body ]
br i1 undef, label %for.end, label %for.body
; CHECK-NOT: extractelement <3 x i32> %{{.*}}, i32 0
for.body:
%dec43 = add <3 x i32> %input_1.addr.1, <i32 -1, i32 -1, i32 -1>
%sub44 = sub <3 x i32> <i32 -1, i32 -1, i32 -1>, %dec43
%div45 = sdiv <3 x i32> %input_2.addr.0, %sub44
br label %for.cond
for.end:
%0 = extractelement <3 x i32> %input_2.addr.0, i32 0
%.89 = select i1 false, i32 0, i32 %0
%tobool313 = icmp eq i32 %.89, 0
ret i1 %tobool313
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,413 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
; Can't get smaller than this.
define <2 x i1> @trunc(<2 x i64> %a) {
; CHECK-LABEL: @trunc(
; CHECK-NEXT: [[T:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
; CHECK-NEXT: ret <2 x i1> [[T]]
;
%t = trunc <2 x i64> %a to <2 x i1>
ret <2 x i1> %t
}
; This is trunc.
define <2 x i1> @and_cmp_is_trunc(<2 x i64> %a) {
; CHECK-LABEL: @and_cmp_is_trunc(
; CHECK-NEXT: [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%t = and <2 x i64> %a, <i64 1, i64 1>
%r = icmp ne <2 x i64> %t, zeroinitializer
ret <2 x i1> %r
}
; This is trunc.
define <2 x i1> @and_cmp_is_trunc_even_with_undef_elt(<2 x i64> %a) {
; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elt(
; CHECK-NEXT: [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%t = and <2 x i64> %a, <i64 undef, i64 1>
%r = icmp ne <2 x i64> %t, zeroinitializer
ret <2 x i1> %r
}
; TODO: This could be just 1 instruction (trunc), but our undef matching is incomplete.
define <2 x i1> @and_cmp_is_trunc_even_with_undef_elts(<2 x i64> %a) {
; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elts(
; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[A:%.*]], <i64 undef, i64 1>
; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i64> [[T]], <i64 undef, i64 0>
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%t = and <2 x i64> %a, <i64 undef, i64 1>
%r = icmp ne <2 x i64> %t, <i64 undef, i64 0>
ret <2 x i1> %r
}
; The ashr turns into an lshr.
define <2 x i64> @test2(<2 x i64> %a) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[B:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 1, i64 1>
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[B]], <i64 32767, i64 32767>
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
;
%b = and <2 x i64> %a, <i64 65535, i64 65535>
%t = ashr <2 x i64> %b, <i64 1, i64 1>
ret <2 x i64> %t
}
define <2 x i64> @test3(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[AND:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[CONV]]
;
%cmp = fcmp ord <4 x float> %a, zeroinitializer
%sext = sext <4 x i1> %cmp to <4 x i32>
%cmp4 = fcmp ord <4 x float> %b, zeroinitializer
%sext5 = sext <4 x i1> %cmp4 to <4 x i32>
%and = and <4 x i32> %sext, %sext5
%conv = bitcast <4 x i32> %and to <2 x i64>
ret <2 x i64> %conv
}
define <2 x i64> @test4(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[OR:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[OR]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[CONV]]
;
%cmp = fcmp uno <4 x float> %a, zeroinitializer
%sext = sext <4 x i1> %cmp to <4 x i32>
%cmp4 = fcmp uno <4 x float> %b, zeroinitializer
%sext5 = sext <4 x i1> %cmp4 to <4 x i32>
%or = or <4 x i32> %sext, %sext5
%conv = bitcast <4 x i32> %or to <2 x i64>
ret <2 x i64> %conv
}
; rdar://7434900
define <2 x i64> @test5(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer
; CHECK-NEXT: [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer
; CHECK-NEXT: [[AND1:%.*]] = and <4 x i1> [[CMP]], [[CMP4]]
; CHECK-NEXT: [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32>
; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[CONV]]
;
%cmp = fcmp ult <4 x float> %a, zeroinitializer
%sext = sext <4 x i1> %cmp to <4 x i32>
%cmp4 = fcmp ult <4 x float> %b, zeroinitializer
%sext5 = sext <4 x i1> %cmp4 to <4 x i32>
%and = and <4 x i32> %sext, %sext5
%conv = bitcast <4 x i32> %and to <2 x i64>
ret <2 x i64> %conv
}
define <2 x i64> @test6(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer
; CHECK-NEXT: [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer
; CHECK-NEXT: [[AND1:%.*]] = or <4 x i1> [[CMP]], [[CMP4]]
; CHECK-NEXT: [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32>
; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[CONV]]
;
%cmp = fcmp ult <4 x float> %a, zeroinitializer
%sext = sext <4 x i1> %cmp to <4 x i32>
%cmp4 = fcmp ult <4 x float> %b, zeroinitializer
%sext5 = sext <4 x i1> %cmp4 to <4 x i32>
%and = or <4 x i32> %sext, %sext5
%conv = bitcast <4 x i32> %and to <2 x i64>
ret <2 x i64> %conv
}
define <2 x i64> @test7(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer
; CHECK-NEXT: [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer
; CHECK-NEXT: [[AND1:%.*]] = xor <4 x i1> [[CMP]], [[CMP4]]
; CHECK-NEXT: [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32>
; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[CONV]]
;
%cmp = fcmp ult <4 x float> %a, zeroinitializer
%sext = sext <4 x i1> %cmp to <4 x i32>
%cmp4 = fcmp ult <4 x float> %b, zeroinitializer
%sext5 = sext <4 x i1> %cmp4 to <4 x i32>
%and = xor <4 x i32> %sext, %sext5
%conv = bitcast <4 x i32> %and to <2 x i64>
ret <2 x i64> %conv
}
define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) {
; CHECK-LABEL: @convert(
; CHECK-NEXT: [[VAL:%.*]] = trunc <2 x i64> [[SRC:%.*]] to <2 x i32>
; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[VAL]], <i32 1, i32 1>
; CHECK-NEXT: store <2 x i32> [[ADD]], <2 x i32>* [[DST_ADDR:%.*]], align 8
; CHECK-NEXT: ret void
;
%val = trunc <2 x i64> %src to <2 x i32>
%add = add <2 x i32> %val, <i32 1, i32 1>
store <2 x i32> %add, <2 x i32>* %dst.addr
ret void
}
define <2 x i65> @foo(<2 x i64> %t) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], <i64 4294967295, i64 4294967295>
; CHECK-NEXT: [[B:%.*]] = zext <2 x i64> [[A_MASK]] to <2 x i65>
; CHECK-NEXT: ret <2 x i65> [[B]]
;
%a = trunc <2 x i64> %t to <2 x i32>
%b = zext <2 x i32> %a to <2 x i65>
ret <2 x i65> %b
}
define <2 x i64> @bar(<2 x i65> %t) {
; CHECK-LABEL: @bar(
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64>
; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[TMP1]], <i64 4294967295, i64 4294967295>
; CHECK-NEXT: ret <2 x i64> [[B]]
;
%a = trunc <2 x i65> %t to <2 x i32>
%b = zext <2 x i32> %a to <2 x i64>
ret <2 x i64> %b
}
define <2 x i64> @bars(<2 x i65> %t) {
; CHECK-LABEL: @bars(
; CHECK-NEXT: [[A:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i32>
; CHECK-NEXT: [[B:%.*]] = sext <2 x i32> [[A]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[B]]
;
%a = trunc <2 x i65> %t to <2 x i32>
%b = sext <2 x i32> %a to <2 x i64>
ret <2 x i64> %b
}
define <2 x i64> @quxs(<2 x i64> %t) {
; CHECK-LABEL: @quxs(
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32>
; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], <i64 32, i64 32>
; CHECK-NEXT: ret <2 x i64> [[B]]
;
%a = trunc <2 x i64> %t to <2 x i32>
%b = sext <2 x i32> %a to <2 x i64>
ret <2 x i64> %b
}
define <2 x i64> @quxt(<2 x i64> %t) {
; CHECK-LABEL: @quxt(
; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32>
; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], <i64 32, i64 32>
; CHECK-NEXT: ret <2 x i64> [[B]]
;
%a = shl <2 x i64> %t, <i64 32, i64 32>
%b = ashr <2 x i64> %a, <i64 32, i64 32>
ret <2 x i64> %b
}
define <2 x double> @fa(<2 x double> %t) {
; CHECK-LABEL: @fa(
; CHECK-NEXT: [[A:%.*]] = fptrunc <2 x double> [[T:%.*]] to <2 x float>
; CHECK-NEXT: [[B:%.*]] = fpext <2 x float> [[A]] to <2 x double>
; CHECK-NEXT: ret <2 x double> [[B]]
;
%a = fptrunc <2 x double> %t to <2 x float>
%b = fpext <2 x float> %a to <2 x double>
ret <2 x double> %b
}
define <2 x double> @fb(<2 x double> %t) {
; CHECK-LABEL: @fb(
; CHECK-NEXT: [[A:%.*]] = fptoui <2 x double> [[T:%.*]] to <2 x i64>
; CHECK-NEXT: [[B:%.*]] = uitofp <2 x i64> [[A]] to <2 x double>
; CHECK-NEXT: ret <2 x double> [[B]]
;
%a = fptoui <2 x double> %t to <2 x i64>
%b = uitofp <2 x i64> %a to <2 x double>
ret <2 x double> %b
}
define <2 x double> @fc(<2 x double> %t) {
; CHECK-LABEL: @fc(
; CHECK-NEXT: [[A:%.*]] = fptosi <2 x double> [[T:%.*]] to <2 x i64>
; CHECK-NEXT: [[B:%.*]] = sitofp <2 x i64> [[A]] to <2 x double>
; CHECK-NEXT: ret <2 x double> [[B]]
;
%a = fptosi <2 x double> %t to <2 x i64>
%b = sitofp <2 x i64> %a to <2 x double>
ret <2 x double> %b
}
; PR9228
define <4 x float> @f(i32 %a) {
; CHECK-LABEL: @f(
; CHECK-NEXT: ret <4 x float> undef
;
%dim = insertelement <4 x i32> poison, i32 %a, i32 0
%dim30 = insertelement <4 x i32> %dim, i32 %a, i32 1
%dim31 = insertelement <4 x i32> %dim30, i32 %a, i32 2
%dim32 = insertelement <4 x i32> %dim31, i32 %a, i32 3
%offset_ptr = getelementptr <4 x float>, <4 x float>* null, i32 1
%offset_int = ptrtoint <4 x float>* %offset_ptr to i64
%sizeof32 = trunc i64 %offset_int to i32
%smearinsert33 = insertelement <4 x i32> poison, i32 %sizeof32, i32 0
%smearinsert34 = insertelement <4 x i32> %smearinsert33, i32 %sizeof32, i32 1
%smearinsert35 = insertelement <4 x i32> %smearinsert34, i32 %sizeof32, i32 2
%smearinsert36 = insertelement <4 x i32> %smearinsert35, i32 %sizeof32, i32 3
%delta_scale = mul <4 x i32> %dim32, %smearinsert36
%offset_delta = add <4 x i32> zeroinitializer, %delta_scale
%offset_varying_delta = add <4 x i32> %offset_delta, undef
ret <4 x float> undef
}
define <8 x i32> @pr24458(<8 x float> %n) {
; CHECK-LABEL: @pr24458(
; CHECK-NEXT: ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
;
%notequal_b_load_.i = fcmp une <8 x float> %n, zeroinitializer
%equal_a_load72_.i = fcmp ueq <8 x float> %n, zeroinitializer
%notequal_b_load__to_boolvec.i = sext <8 x i1> %notequal_b_load_.i to <8 x i32>
%equal_a_load72__to_boolvec.i = sext <8 x i1> %equal_a_load72_.i to <8 x i32>
%wrong = or <8 x i32> %notequal_b_load__to_boolvec.i, %equal_a_load72__to_boolvec.i
ret <8 x i32> %wrong
}
; Hoist a trunc to a scalar if we're inserting into an undef vector.
; trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
define <3 x i16> @trunc_inselt_undef(i32 %x) {
; CHECK-LABEL: @trunc_inselt_undef(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
; CHECK-NEXT: [[TRUNC:%.*]] = insertelement <3 x i16> undef, i16 [[TMP1]], i32 1
; CHECK-NEXT: ret <3 x i16> [[TRUNC]]
;
%vec = insertelement <3 x i32> poison, i32 %x, i32 1
%trunc = trunc <3 x i32> %vec to <3 x i16>
ret <3 x i16> %trunc
}
; Hoist a trunc to a scalar if we're inserting into an undef vector.
; trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
define <2 x float> @fptrunc_inselt_undef(double %x, i32 %index) {
; CHECK-LABEL: @fptrunc_inselt_undef(
; CHECK-NEXT: [[TMP1:%.*]] = fptrunc double [[X:%.*]] to float
; CHECK-NEXT: [[TRUNC:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 [[INDEX:%.*]]
; CHECK-NEXT: ret <2 x float> [[TRUNC]]
;
%vec = insertelement <2 x double> <double undef, double undef>, double %x, i32 %index
%trunc = fptrunc <2 x double> %vec to <2 x float>
ret <2 x float> %trunc
}
; TODO: Strengthen the backend, so we can have this canonicalization.
; Insert a scalar int into a constant vector and truncate:
; trunc (inselt C, X, Index) --> inselt C, (trunc X), Index
define <3 x i16> @trunc_inselt1(i32 %x) {
; CHECK-LABEL: @trunc_inselt1(
; CHECK-NEXT: [[VEC:%.*]] = insertelement <3 x i32> <i32 3, i32 undef, i32 65536>, i32 [[X:%.*]], i32 1
; CHECK-NEXT: [[TRUNC:%.*]] = trunc <3 x i32> [[VEC]] to <3 x i16>
; CHECK-NEXT: ret <3 x i16> [[TRUNC]]
;
%vec = insertelement <3 x i32> <i32 3, i32 -2, i32 65536>, i32 %x, i32 1
%trunc = trunc <3 x i32> %vec to <3 x i16>
ret <3 x i16> %trunc
}
; TODO: Strengthen the backend, so we can have this canonicalization.
; Insert a scalar FP into a constant vector and FP truncate:
; fptrunc (inselt C, X, Index) --> inselt C, (fptrunc X), Index
define <2 x float> @fptrunc_inselt1(double %x, i32 %index) {
; CHECK-LABEL: @fptrunc_inselt1(
; CHECK-NEXT: [[VEC:%.*]] = insertelement <2 x double> <double undef, double 3.000000e+00>, double [[X:%.*]], i32 [[INDEX:%.*]]
; CHECK-NEXT: [[TRUNC:%.*]] = fptrunc <2 x double> [[VEC]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TRUNC]]
;
%vec = insertelement <2 x double> <double undef, double 3.0>, double %x, i32 %index
%trunc = fptrunc <2 x double> %vec to <2 x float>
ret <2 x float> %trunc
}
; TODO: Strengthen the backend, so we can have this canonicalization.
; Insert a scalar int constant into a vector and truncate:
; trunc (inselt X, C, Index) --> inselt (trunc X), C', Index
define <8 x i16> @trunc_inselt2(<8 x i32> %x, i32 %index) {
; CHECK-LABEL: @trunc_inselt2(
; CHECK-NEXT: [[VEC:%.*]] = insertelement <8 x i32> [[X:%.*]], i32 1048576, i32 [[INDEX:%.*]]
; CHECK-NEXT: [[TRUNC:%.*]] = trunc <8 x i32> [[VEC]] to <8 x i16>
; CHECK-NEXT: ret <8 x i16> [[TRUNC]]
;
%vec = insertelement <8 x i32> %x, i32 1048576, i32 %index
%trunc = trunc <8 x i32> %vec to <8 x i16>
ret <8 x i16> %trunc
}
; TODO: Strengthen the backend, so we can have this canonicalization.
; Insert a scalar FP constant into a vector and FP truncate:
; fptrunc (inselt X, C, Index) --> inselt (fptrunc X), C', Index
define <3 x float> @fptrunc_inselt2(<3 x double> %x) {
; CHECK-LABEL: @fptrunc_inselt2(
; CHECK-NEXT: [[VEC:%.*]] = insertelement <3 x double> [[X:%.*]], double 4.000000e+00, i32 2
; CHECK-NEXT: [[TRUNC:%.*]] = fptrunc <3 x double> [[VEC]] to <3 x float>
; CHECK-NEXT: ret <3 x float> [[TRUNC]]
;
%vec = insertelement <3 x double> %x, double 4.0, i32 2
%trunc = fptrunc <3 x double> %vec to <3 x float>
ret <3 x float> %trunc
}
; Converting to a wide type might reduce instruction count,
; but we can not do that unless the backend can recover from
; the creation of a potentially illegal op (like a 64-bit vmul).
; PR40032 - https://bugs.llvm.org/show_bug.cgi?id=40032
define <2 x i64> @sext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) {
; CHECK-LABEL: @sext_less_casting_with_wideop(
; CHECK-NEXT: [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32>
; CHECK-NEXT: [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32>
; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]]
; CHECK-NEXT: [[R:%.*]] = sext <2 x i32> [[MUL]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[R]]
;
%xnarrow = trunc <2 x i64> %x to <2 x i32>
%ynarrow = trunc <2 x i64> %y to <2 x i32>
%mul = mul <2 x i32> %xnarrow, %ynarrow
%r = sext <2 x i32> %mul to <2 x i64>
ret <2 x i64> %r
}
define <2 x i64> @zext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) {
; CHECK-LABEL: @zext_less_casting_with_wideop(
; CHECK-NEXT: [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32>
; CHECK-NEXT: [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32>
; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]]
; CHECK-NEXT: [[R:%.*]] = zext <2 x i32> [[MUL]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[R]]
;
%xnarrow = trunc <2 x i64> %x to <2 x i32>
%ynarrow = trunc <2 x i64> %y to <2 x i32>
%mul = mul <2 x i32> %xnarrow, %ynarrow
%r = zext <2 x i32> %mul to <2 x i64>
ret <2 x i64> %r
}

View File

@ -0,0 +1,74 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@G1 = global i8 zeroinitializer
define <2 x i1> @test(<2 x i8*> %a, <2 x i8*> %b) {
; CHECK-LABEL: @test(
; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8*> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%c = icmp eq <2 x i8*> %a, %b
ret <2 x i1> %c
}
define <2 x i1> @test2(<2 x i8*> %a) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: ret <2 x i1> zeroinitializer
;
%c = inttoptr <2 x i32> <i32 1, i32 2> to <2 x i8*>
%d = icmp ult <2 x i8*> %c, zeroinitializer
ret <2 x i1> %d
}
define <2 x i1> @test3(<2 x i8*> %a) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: ret <2 x i1> zeroinitializer
;
%g = getelementptr i8, <2 x i8*> %a, <2 x i32> <i32 1, i32 0>
%B = icmp ult <2 x i8*> %g, zeroinitializer
ret <2 x i1> %B
}
define <1 x i1> @test4(<1 x i8*> %a) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: ret <1 x i1> zeroinitializer
;
%g = getelementptr i8, <1 x i8*> %a, <1 x i32> <i32 1>
%B = icmp ult <1 x i8*> %g, zeroinitializer
ret <1 x i1> %B
}
define <2 x i1> @test5(<2 x i8*> %a) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: ret <2 x i1> zeroinitializer
;
%w = getelementptr i8, <2 x i8*> %a, <2 x i32> zeroinitializer
%e = getelementptr i8, <2 x i8*> %w, <2 x i32> <i32 5, i32 9>
%g = getelementptr i8, <2 x i8*> %e, <2 x i32> <i32 1, i32 0>
%B = icmp ult <2 x i8*> %g, zeroinitializer
ret <2 x i1> %B
}
define <2 x i32*> @test7(<2 x {i32, i32}*> %a) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x { i32, i32 }*> [[A:%.*]], <2 x i64> <i64 5, i64 9>, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32*> [[W]]
;
%w = getelementptr {i32, i32}, <2 x {i32, i32}*> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer
ret <2 x i32*> %w
}
define <vscale x 2 x i1> @test8() {
; CHECK-LABEL: @test8(
; CHECK-NEXT: ret <vscale x 2 x i1> icmp ult (<vscale x 2 x i64> zext (<vscale x 2 x i32> shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer) to <vscale x 2 x i64>), <vscale x 2 x i64> zeroinitializer)
;
%ins = insertelement <vscale x 2 x i32> poison, i32 1, i32 0
%b = shufflevector <vscale x 2 x i32> %ins, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
%c = inttoptr <vscale x 2 x i32> %b to <vscale x 2 x i8*>
%d = icmp ult <vscale x 2 x i8*> %c, zeroinitializer
ret <vscale x 2 x i1> %d
}

View File

@ -0,0 +1,93 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
; insertelements should fold to shuffle
define <4 x float> @foo(<4 x float> %x) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: [[INS2:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: ret <4 x float> [[INS2]]
;
%ins1 = insertelement<4 x float> %x, float 1.0, i32 1
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
; Insert of a constant is canonicalized ahead of insert of a variable.
define <4 x float> @bar(<4 x float> %x, float %a) {
; CHECK-LABEL: @bar(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[X:%.*]], float 2.000000e+00, i32 2
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[TMP1]], float [[A:%.*]], i32 1
; CHECK-NEXT: ret <4 x float> [[INS2]]
;
%ins1 = insertelement<4 x float> %x, float %a, i32 1
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
define <4 x float> @baz(<4 x float> %x, i32 %a) {
; CHECK-LABEL: @baz(
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> [[X:%.*]], float 1.000000e+00, i32 1
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 2.000000e+00, i32 [[A:%.*]]
; CHECK-NEXT: ret <4 x float> [[INS2]]
;
%ins1 = insertelement<4 x float> %x, float 1.0, i32 1
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 %a
ret <4 x float> %ins2
}
; insertelements should fold to shuffle
define <4 x float> @bazz(<4 x float> %x, i32 %a) {
; CHECK-LABEL: @bazz(
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> [[X:%.*]], float 1.000000e+00, i32 3
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 5.000000e+00, i32 [[A:%.*]]
; CHECK-NEXT: [[INS5:%.*]] = shufflevector <4 x float> [[INS2]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: [[INS6:%.*]] = insertelement <4 x float> [[INS5]], float 7.000000e+00, i32 [[A]]
; CHECK-NEXT: ret <4 x float> [[INS6]]
;
%ins1 = insertelement<4 x float> %x, float 1.0, i32 3
%ins2 = insertelement<4 x float> %ins1, float 5.0, i32 %a
%ins3 = insertelement<4 x float> %ins2, float 3.0, i32 2
%ins4 = insertelement<4 x float> %ins3, float 1.0, i32 1
%ins5 = insertelement<4 x float> %ins4, float 2.0, i32 2
%ins6 = insertelement<4 x float> %ins5, float 7.0, i32 %a
ret <4 x float> %ins6
}
; Out of bounds index folds to undef
define <4 x float> @bazzz(<4 x float> %x) {
; CHECK-LABEL: @bazzz(
; CHECK-NEXT: ret <4 x float> <float undef, float undef, float 2.000000e+00, float undef>
;
%ins1 = insertelement<4 x float> %x, float 1.0, i32 5
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
define <4 x float> @bazzzz(<4 x float> %x) {
; CHECK-LABEL: @bazzzz(
; CHECK-NEXT: ret <4 x float> <float undef, float undef, float 2.000000e+00, float undef>
;
%ins1 = insertelement<4 x float> %x, float 1.0, i32 undef
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
define <4 x float> @bazzzzz() {
; CHECK-LABEL: @bazzzzz(
; CHECK-NEXT: ret <4 x float> <float 1.000000e+00, float 5.000000e+00, float 1.000000e+01, float 4.000000e+00>
;
%ins1 = insertelement <4 x float> insertelement (<4 x float> <float 1.0, float 2.0, float 3.0, float undef>, float 4.0, i32 3), float 5.0, i32 1
%ins2 = insertelement<4 x float> %ins1, float 10.0, i32 2
ret <4 x float> %ins2
}
define <4 x float> @bazzzzzz(<4 x float> %x, i32 %a) {
; CHECK-LABEL: @bazzzzzz(
; CHECK-NEXT: ret <4 x float> <float poison, float 5.000000e+00, float undef, float 4.000000e+00>
;
%ins1 = insertelement <4 x float> insertelement (<4 x float> shufflevector (<4 x float> poison, <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0> , <4 x i32> <i32 0, i32 5, i32 undef, i32 6> ), float 4.0, i32 3), float 5.0, i32 1
ret <4 x float> %ins1
}

View File

@ -0,0 +1,185 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
define i32 @extractelement_in_range(<vscale x 4 x i32> %a) {
; CHECK-LABEL: @extractelement_in_range(
; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i64 1
; CHECK-NEXT: ret i32 [[R]]
;
%r = extractelement <vscale x 4 x i32> %a, i64 1
ret i32 %r
}
define i32 @extractelement_maybe_out_of_range(<vscale x 4 x i32> %a) {
; CHECK-LABEL: @extractelement_maybe_out_of_range(
; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i64 4
; CHECK-NEXT: ret i32 [[R]]
;
%r = extractelement <vscale x 4 x i32> %a, i64 4
ret i32 %r
}
define i32 @extractelement_bitcast(float %f) {
; CHECK-LABEL: @extractelement_bitcast(
; CHECK-NEXT: [[R:%.*]] = bitcast float [[F:%.*]] to i32
; CHECK-NEXT: ret i32 [[R]]
;
%vec_float = insertelement <vscale x 4 x float> poison, float %f, i32 0
%vec_int = bitcast <vscale x 4 x float> %vec_float to <vscale x 4 x i32>
%r = extractelement <vscale x 4 x i32> %vec_int, i32 0
ret i32 %r
}
define i8 @extractelement_bitcast_to_trunc(<vscale x 2 x i32> %a, i32 %x) {
; CHECK-LABEL: @extractelement_bitcast_to_trunc(
; CHECK-NEXT: [[R:%.*]] = trunc i32 [[X:%.*]] to i8
; CHECK-NEXT: ret i8 [[R]]
;
%vec = insertelement <vscale x 2 x i32> %a, i32 %x, i32 1
%vec_cast = bitcast <vscale x 2 x i32> %vec to <vscale x 8 x i8>
%r = extractelement <vscale x 8 x i8> %vec_cast, i32 4
ret i8 %r
}
; TODO: Instcombine could remove the insert.
define i8 @extractelement_bitcast_wrong_insert(<vscale x 2 x i32> %a, i32 %x) {
; CHECK-LABEL: @extractelement_bitcast_wrong_insert(
; CHECK-NEXT: [[VEC:%.*]] = insertelement <vscale x 2 x i32> [[A:%.*]], i32 [[X:%.*]], i32 1
; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <vscale x 2 x i32> [[VEC]] to <vscale x 8 x i8>
; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 8 x i8> [[VEC_CAST]], i32 2
; CHECK-NEXT: ret i8 [[R]]
;
%vec = insertelement <vscale x 2 x i32> %a, i32 %x, i32 1 ; <- This insert could be removed.
%vec_cast = bitcast <vscale x 2 x i32> %vec to <vscale x 8 x i8>
%r = extractelement <vscale x 8 x i8> %vec_cast, i32 2
ret i8 %r
}
; TODO: Instcombine could optimize to return %v.
define i32 @extractelement_shuffle_in_range(i32 %v) {
; CHECK-LABEL: @extractelement_shuffle_in_range(
; CHECK-NEXT: [[IN:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i32 0
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[IN]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[SPLAT]], i32 1
; CHECK-NEXT: ret i32 [[R]]
;
%in = insertelement <vscale x 4 x i32> poison, i32 %v, i32 0
%splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%r = extractelement <vscale x 4 x i32> %splat, i32 1
ret i32 %r
}
define i32 @extractelement_shuffle_maybe_out_of_range(i32 %v) {
; CHECK-LABEL: @extractelement_shuffle_maybe_out_of_range(
; CHECK-NEXT: [[IN:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i32 0
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[IN]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[SPLAT]], i32 4
; CHECK-NEXT: ret i32 [[R]]
;
%in = insertelement <vscale x 4 x i32> poison, i32 %v, i32 0
%splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%r = extractelement <vscale x 4 x i32> %splat, i32 4
ret i32 %r
}
define i32 @extractelement_shuffle_invalid_index(i32 %v) {
; CHECK-LABEL: @extractelement_shuffle_invalid_index(
; CHECK-NEXT: [[IN:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i32 0
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[IN]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[SPLAT]], i32 -1
; CHECK-NEXT: ret i32 [[R]]
;
%in = insertelement <vscale x 4 x i32> poison, i32 %v, i32 0
%splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%r = extractelement <vscale x 4 x i32> %splat, i32 -1
ret i32 %r
}
define i32 @extractelement_shuffle_symbolic_index(i32 %v, i32 %idx) {
; CHECK-LABEL: @extractelement_shuffle_symbolic_index(
; CHECK-NEXT: [[IN:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i32 0
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[IN]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[SPLAT]], i32 [[IDX:%.*]]
; CHECK-NEXT: ret i32 [[R]]
;
%in = insertelement <vscale x 4 x i32> poison, i32 %v, i32 0
%splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%r = extractelement <vscale x 4 x i32> %splat, i32 %idx
ret i32 %r
}
define <vscale x 4 x i32> @extractelement_insertelement_same_positions(<vscale x 4 x i32> %vec) {
; CHECK-LABEL: @extractelement_insertelement_same_positions(
; CHECK-NEXT: ret <vscale x 4 x i32> [[VEC:%.*]]
;
%vec.e0 = extractelement <vscale x 4 x i32> %vec, i32 0
%vec.e1 = extractelement <vscale x 4 x i32> %vec, i32 1
%vec.e2 = extractelement <vscale x 4 x i32> %vec, i32 2
%vec.e3 = extractelement <vscale x 4 x i32> %vec, i32 3
%1 = insertelement <vscale x 4 x i32> %vec, i32 %vec.e0, i32 0
%2 = insertelement <vscale x 4 x i32> %1, i32 %vec.e1, i32 1
%3 = insertelement <vscale x 4 x i32> %2, i32 %vec.e2, i32 2
%4 = insertelement <vscale x 4 x i32> %3, i32 %vec.e3, i32 3
ret <vscale x 4 x i32> %4
}
define <vscale x 4 x i32> @extractelement_insertelement_diff_positions(<vscale x 4 x i32> %vec) {
; CHECK-LABEL: @extractelement_insertelement_diff_positions(
; CHECK-NEXT: [[VEC_E0:%.*]] = extractelement <vscale x 4 x i32> [[VEC:%.*]], i32 4
; CHECK-NEXT: [[VEC_E1:%.*]] = extractelement <vscale x 4 x i32> [[VEC]], i32 5
; CHECK-NEXT: [[VEC_E2:%.*]] = extractelement <vscale x 4 x i32> [[VEC]], i32 6
; CHECK-NEXT: [[VEC_E3:%.*]] = extractelement <vscale x 4 x i32> [[VEC]], i32 7
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <vscale x 4 x i32> [[VEC]], i32 [[VEC_E0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[VEC_E1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <vscale x 4 x i32> [[TMP2]], i32 [[VEC_E2]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <vscale x 4 x i32> [[TMP3]], i32 [[VEC_E3]], i32 3
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
;
%vec.e0 = extractelement <vscale x 4 x i32> %vec, i32 4
%vec.e1 = extractelement <vscale x 4 x i32> %vec, i32 5
%vec.e2 = extractelement <vscale x 4 x i32> %vec, i32 6
%vec.e3 = extractelement <vscale x 4 x i32> %vec, i32 7
%1 = insertelement <vscale x 4 x i32> %vec, i32 %vec.e0, i32 0
%2 = insertelement <vscale x 4 x i32> %1, i32 %vec.e1, i32 1
%3 = insertelement <vscale x 4 x i32> %2, i32 %vec.e2, i32 2
%4 = insertelement <vscale x 4 x i32> %3, i32 %vec.e3, i32 3
ret <vscale x 4 x i32> %4
}
define i32 @bitcast_of_extractelement( <vscale x 2 x float> %d) {
; CHECK-LABEL: @bitcast_of_extractelement(
; CHECK-NEXT: [[BC:%.*]] = bitcast <vscale x 2 x float> [[D:%.*]] to <vscale x 2 x i32>
; CHECK-NEXT: [[CAST:%.*]] = extractelement <vscale x 2 x i32> [[BC]], i32 0
; CHECK-NEXT: ret i32 [[CAST]]
;
%ext = extractelement <vscale x 2 x float> %d, i32 0
%cast = bitcast float %ext to i32
ret i32 %cast
}
define i1 @extractelement_is_zero(<vscale x 2 x i32> %d, i1 %b, i32 %z) {
; CHECK-LABEL: @extractelement_is_zero(
; CHECK-NEXT: [[EXT:%.*]] = extractelement <vscale x 2 x i32> [[D:%.*]], i32 0
; CHECK-NEXT: [[BB:%.*]] = icmp eq i32 [[EXT]], 0
; CHECK-NEXT: ret i1 [[BB]]
;
%ext = extractelement <vscale x 2 x i32> %d, i32 0
%bb = icmp eq i32 %ext, 0
ret i1 %bb
}
; OSS-Fuzz #25272
; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=25272
define i32 @ossfuzz_25272(float %f) {
; CHECK-LABEL: @ossfuzz_25272(
; CHECK-NEXT: [[VEC_FLOAT:%.*]] = insertelement <vscale x 4 x float> poison, float [[F:%.*]], i32 0
; CHECK-NEXT: [[VEC_INT:%.*]] = bitcast <vscale x 4 x float> [[VEC_FLOAT]] to <vscale x 4 x i32>
; CHECK-NEXT: [[E:%.*]] = extractelement <vscale x 4 x i32> [[VEC_INT]], i32 2147483647
; CHECK-NEXT: ret i32 [[E]]
;
%vec_float = insertelement <vscale x 4 x float> poison, float %f, i32 0
%vec_int = bitcast <vscale x 4 x float> %vec_float to <vscale x 4 x i32>
%E = extractelement <vscale x 4 x i32> %vec_int, i32 2147483647
ret i32 %E
}

View File

@ -0,0 +1,102 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -instcombine < %s | FileCheck %s
; This test checks that bitcast is moved after insertelement when both vector and scalar are
; bitcast from the same element type.
; inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp
; --> bitcast (inselt VecSrc, ScalarSrc, IdxOp)
define <vscale x 4 x float> @insertelement_bitcast(<vscale x 4 x i32> %vec, i32 %x) {
; CHECK-LABEL: @insertelement_bitcast(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <vscale x 4 x i32> [[VEC:%.*]], i32 [[X:%.*]], i32 0
; CHECK-NEXT: [[R:%.*]] = bitcast <vscale x 4 x i32> [[TMP1]] to <vscale x 4 x float>
; CHECK-NEXT: ret <vscale x 4 x float> [[R]]
;
%x_cast = bitcast i32 %x to float
%vec_cast = bitcast <vscale x 4 x i32> %vec to <vscale x 4 x float>
%r = insertelement <vscale x 4 x float> %vec_cast, float %x_cast, i32 0
ret <vscale x 4 x float> %r
}
; This test checks that code-path "Try to form a shuffle from a chain of extract-insert ops" is
; not taken when both extract and insert are scalable type.
; For scalable type, the vector length needed to create shuffle mask is not a compile-time constant.
; Meanwhile, for scalable type shuffle mask only support splat and undef in the current code base.
; Otherwise we crash at:
; "Assertion `isValidOperands(V1, V2, Mask) && "Invalid shuffle vector instruction operands!"' failed."
define <vscale x 4 x i32> @insertelement_extractelement(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: @insertelement_extractelement(
; CHECK-NEXT: [[T0:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i32 1
; CHECK-NEXT: [[T1:%.*]] = insertelement <vscale x 4 x i32> [[B:%.*]], i32 [[T0]], i32 0
; CHECK-NEXT: ret <vscale x 4 x i32> [[T1]]
;
%t0 = extractelement <vscale x 4 x i32> %a, i32 1
%t1 = insertelement <vscale x 4 x i32> %b, i32 %t0, i32 0
ret <vscale x 4 x i32> %t1
}
; This test checks that we are not attempting to create a shuffle from extract/insert chain,
; when extract is from a scalable type, and the insert vector is fixed-length.
define <4 x i32> @insertelement_extractelement_fixed_vec_extract_from_scalable(<vscale x 4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @insertelement_extractelement_fixed_vec_extract_from_scalable(
; CHECK-NEXT: [[T0:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i32 1
; CHECK-NEXT: [[T1:%.*]] = insertelement <4 x i32> [[B:%.*]], i32 [[T0]], i32 0
; CHECK-NEXT: ret <4 x i32> [[T1]]
;
%t0 = extractelement <vscale x 4 x i32> %a, i32 1
%t1 = insertelement <4 x i32> %b, i32 %t0, i32 0
ret <4 x i32> %t1
}
; This test checks that the optimization "foldConstantInsEltInfoShuffle" is not taken for scalable type.
; Particularly the fold:
; insertelt (insertelt X, C1, CIndex1), C, CIndex
; --> shufflevector X, CVec', Mask'
; For scalable type, the vector length needed to create shuffle mask is not a compile-time constant.
; Meanwhile, for scalable type shuffle mask only support splat and undef in the current code base.
; Otherwise we crash at:
; "Assertion `isValidOperands(V1, V2, Mask) && "Invalid shuffle vector instruction operands!"' failed."
define <vscale x 4 x i32> @insertelement_insertelement(<vscale x 4 x i32> %vec) {
; CHECK-LABEL: @insertelement_insertelement(
; CHECK-NEXT: [[T0:%.*]] = insertelement <vscale x 4 x i32> [[VEC:%.*]], i32 1, i32 1
; CHECK-NEXT: [[T1:%.*]] = insertelement <vscale x 4 x i32> [[T0]], i32 2, i32 2
; CHECK-NEXT: ret <vscale x 4 x i32> [[T1]]
;
%t0 = insertelement <vscale x 4 x i32> %vec, i32 1, i32 1
%t1 = insertelement <vscale x 4 x i32> %t0, i32 2, i32 2
ret <vscale x 4 x i32> %t1
}
; This test checks that the following insertelement sequence is not folded into shuffle splat.
; The length of scalable vector is unknown at compile-time. Therefore the following insertelements
; may not form a valid splat.
define <vscale x 4 x float> @insertelement_sequene_may_not_be_splat(float %x) {
; CHECK-LABEL: @insertelement_sequene_may_not_be_splat(
; CHECK-NEXT: [[T0:%.*]] = insertelement <vscale x 4 x float> poison, float [[X:%.*]], i32 0
; CHECK-NEXT: [[T1:%.*]] = insertelement <vscale x 4 x float> [[T0]], float [[X]], i32 1
; CHECK-NEXT: [[T2:%.*]] = insertelement <vscale x 4 x float> [[T1]], float [[X]], i32 2
; CHECK-NEXT: [[T3:%.*]] = insertelement <vscale x 4 x float> [[T2]], float [[X]], i32 3
; CHECK-NEXT: ret <vscale x 4 x float> [[T3]]
;
%t0 = insertelement <vscale x 4 x float> poison, float %x, i32 0
%t1 = insertelement <vscale x 4 x float> %t0, float %x, i32 1
%t2 = insertelement <vscale x 4 x float> %t1, float %x, i32 2
%t3 = insertelement <vscale x 4 x float> %t2, float %x, i32 3
ret <vscale x 4 x float> %t3
}
; OSS-Fuzz #27416
; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=27416
define void @ossfuzz_27416(i32 %v) {
; CHECK-LABEL: @ossfuzz_27416(
; CHECK-NEXT: [[IN:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i32 0
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[IN]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[I1:%.*]] = insertelement <vscale x 4 x i32> [[SPLAT]], i32 undef, i8 -128
; CHECK-NEXT: store <vscale x 4 x i32> [[I1]], <vscale x 4 x i32>* undef, align 16
; CHECK-NEXT: ret void
;
%in = insertelement <vscale x 4 x i32> poison, i32 %v, i32 0
%splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%I1 = insertelement <vscale x 4 x i32> %splat, i32 undef, i8 -128
store <vscale x 4 x i32> %I1, <vscale x 4 x i32>* undef, align 16
ret void
}

View File

@ -0,0 +1,52 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
define i32 @test1() {
; CHECK-LABEL: @test1(
; CHECK-NEXT: ret i32 2139171423
;
%A = bitcast i32 2139171423 to float
%B = insertelement <1 x float> poison, float %A, i32 0
%C = extractelement <1 x float> %B, i32 0
%D = bitcast float %C to i32
ret i32 %D
}
define <4 x i64> @insertelement() {
; CHECK-LABEL: @insertelement(
; CHECK-NEXT: ret <4 x i64> <i64 -1, i64 -2, i64 -3, i64 -4>
;
%vec1 = insertelement <4 x i64> poison, i64 -1, i32 0
%vec2 = insertelement <4 x i64> %vec1, i64 -2, i32 1
%vec3 = insertelement <4 x i64> %vec2, i64 -3, i32 2
%vec4 = insertelement <4 x i64> %vec3, i64 -4, i32 3
ret <4 x i64> %vec4
}
define <4 x i64> @insertelement_undef() {
; CHECK-LABEL: @insertelement_undef(
; CHECK-NEXT: ret <4 x i64> poison
;
%vec1 = insertelement <4 x i64> poison, i64 -1, i32 0
%vec2 = insertelement <4 x i64> %vec1, i64 -2, i32 1
%vec3 = insertelement <4 x i64> %vec2, i64 -3, i32 2
%vec4 = insertelement <4 x i64> %vec3, i64 -4, i32 3
%vec5 = insertelement <4 x i64> %vec3, i64 -5, i32 4
ret <4 x i64> %vec5
}
define i64 @extract_undef_index_from_zero_vec() {
; CHECK-LABEL: @extract_undef_index_from_zero_vec(
; CHECK-NEXT: ret i64 poison
;
%E = extractelement <2 x i64> zeroinitializer, i64 undef
ret i64 %E
}
define i64 @extract_undef_index_from_nonzero_vec() {
; CHECK-LABEL: @extract_undef_index_from_nonzero_vec(
; CHECK-NEXT: ret i64 poison
;
%E = extractelement <2 x i64> <i64 -1, i64 -1>, i64 undef
ret i64 %E
}

View File

@ -0,0 +1,301 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instsimplify -S -verify | FileCheck %s
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Unary Operations
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
define <vscale x 2 x double> @fneg(<vscale x 2 x double> %val) {
; CHECK-LABEL: @fneg(
; CHECK-NEXT: ret <vscale x 2 x double> undef
;
%r = fneg <vscale x 2 x double> undef
ret <vscale x 2 x double> %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Binary Operations
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
define <vscale x 4 x i32> @add() {
; CHECK-LABEL: @add(
; CHECK-NEXT: ret <vscale x 4 x i32> undef
;
%r = add <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x float> @fadd() {
; CHECK-LABEL: @fadd(
; CHECK-NEXT: ret <vscale x 4 x float> undef
;
%r = fadd <vscale x 4 x float> undef, undef
ret <vscale x 4 x float> %r
}
define <vscale x 4 x i32> @sub() {
; CHECK-LABEL: @sub(
; CHECK-NEXT: ret <vscale x 4 x i32> undef
;
%r = sub <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @sub_splat() {
; CHECK-LABEL: @sub_splat(
; CHECK-NEXT: ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 -16, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
;
%r = sub <vscale x 4 x i32> zeroinitializer, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x float> @fsub() {
; CHECK-LABEL: @fsub(
; CHECK-NEXT: ret <vscale x 4 x float> undef
;
%r = fsub <vscale x 4 x float> undef, undef
ret <vscale x 4 x float> %r
}
define <vscale x 4 x i32> @mul() {
; CHECK-LABEL: @mul(
; CHECK-NEXT: ret <vscale x 4 x i32> undef
;
%r = mul <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x float> @fmul() {
; CHECK-LABEL: @fmul(
; CHECK-NEXT: ret <vscale x 4 x float> undef
;
%r = fmul <vscale x 4 x float> undef, undef
ret <vscale x 4 x float> %r
}
define <vscale x 4 x i32> @udiv() {
; CHECK-LABEL: @udiv(
; CHECK-NEXT: ret <vscale x 4 x i32> poison
;
%r = udiv <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @udiv_splat_zero() {
; CHECK-LABEL: @udiv_splat_zero(
; CHECK-NEXT: ret <vscale x 4 x i32> poison
;
%r = udiv <vscale x 4 x i32> zeroinitializer, zeroinitializer
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @sdiv() {
; CHECK-LABEL: @sdiv(
; CHECK-NEXT: ret <vscale x 4 x i32> poison
;
%r = sdiv <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x float> @fdiv() {
; CHECK-LABEL: @fdiv(
; CHECK-NEXT: ret <vscale x 4 x float> undef
;
%r = fdiv <vscale x 4 x float> undef, undef
ret <vscale x 4 x float> %r
}
define <vscale x 4 x i32> @urem() {
; CHECK-LABEL: @urem(
; CHECK-NEXT: ret <vscale x 4 x i32> poison
;
%r = urem <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @srem() {
; CHECK-LABEL: @srem(
; CHECK-NEXT: ret <vscale x 4 x i32> poison
;
%r = srem <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x float> @frem() {
; CHECK-LABEL: @frem(
; CHECK-NEXT: ret <vscale x 4 x float> undef
;
%r = frem <vscale x 4 x float> undef, undef
ret <vscale x 4 x float> %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Bitwise Binary Operations
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
define <vscale x 4 x i32> @shl() {
; CHECK-LABEL: @shl(
; CHECK-NEXT: ret <vscale x 4 x i32> poison
;
%r = shl <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @lshr() {
; CHECK-LABEL: @lshr(
; CHECK-NEXT: ret <vscale x 4 x i32> poison
;
%r = lshr <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @ashr() {
; CHECK-LABEL: @ashr(
; CHECK-NEXT: ret <vscale x 4 x i32> poison
;
%r = ashr <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @and() {
; CHECK-LABEL: @and(
; CHECK-NEXT: ret <vscale x 4 x i32> undef
;
%r = and <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @or() {
; CHECK-LABEL: @or(
; CHECK-NEXT: ret <vscale x 4 x i32> undef
;
%r = or <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @xor() {
; CHECK-LABEL: @xor(
; CHECK-NEXT: ret <vscale x 4 x i32> zeroinitializer
;
%r = xor <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i32> %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Vector Operations
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
define <vscale x 4 x i32> @insertelement() {
; CHECK-LABEL: @insertelement(
; CHECK-NEXT: ret <vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0)
;
%i = insertelement <vscale x 4 x i32> poison, i32 1, i32 0
ret <vscale x 4 x i32> %i
}
define <vscale x 4 x i32> @shufflevector() {
; CHECK-LABEL: @shufflevector(
; CHECK-NEXT: ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
;
%i = insertelement <vscale x 4 x i32> poison, i32 1, i32 0
%i2 = shufflevector <vscale x 4 x i32> %i, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
ret <vscale x 4 x i32> %i2
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Memory Access and Addressing Operations
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
define <vscale x 2 x double> @load() {
; CHECK-LABEL: @load(
; CHECK-NEXT: [[R:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* getelementptr (<vscale x 2 x double>, <vscale x 2 x double>* null, i64 1), align 16
; CHECK-NEXT: ret <vscale x 2 x double> [[R]]
;
%r = load <vscale x 2 x double>, <vscale x 2 x double>* getelementptr (<vscale x 2 x double>, <vscale x 2 x double>* null, i64 1)
ret <vscale x 2 x double> %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Conversion Operations
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
define <vscale x 4 x float> @bitcast() {
; CHECK-LABEL: @bitcast(
; CHECK-NEXT: ret <vscale x 4 x float> bitcast (<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x float>)
;
%i1 = insertelement <vscale x 4 x i32> poison, i32 1, i32 0
%i2 = shufflevector <vscale x 4 x i32> %i1, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%i3 = bitcast <vscale x 4 x i32> %i2 to <vscale x 4 x float>
ret <vscale x 4 x float> %i3
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Other Operations
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
define <vscale x 4 x i32> @select() {
; CHECK-LABEL: @select(
; CHECK-NEXT: ret <vscale x 4 x i32> undef
;
%r = select <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> undef
ret <vscale x 4 x i32> %r
}
declare <vscale x 16 x i8> @llvm.something(<vscale x 16 x i8>, <vscale x 16 x i8>)
define <vscale x 16 x i8> @call() {
; CHECK-LABEL: @call(
; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.something(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
; CHECK-NEXT: ret <vscale x 16 x i8> [[R]]
;
%r = call <vscale x 16 x i8> @llvm.something(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
ret <vscale x 16 x i8> %r
}
define <vscale x 4 x i1> @icmp_undef() {
; CHECK-LABEL: @icmp_undef(
; CHECK-NEXT: ret <vscale x 4 x i1> undef
;
%r = icmp eq <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i1> %r
}
define <vscale x 4 x i1> @icmp_zero() {
; CHECK-LABEL: @icmp_zero(
; CHECK-NEXT: ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer)
;
%r = icmp eq <vscale x 4 x i32> zeroinitializer, zeroinitializer
ret <vscale x 4 x i1> %r
}
define <vscale x 4 x i1> @fcmp_true() {
; CHECK-LABEL: @fcmp_true(
; CHECK-NEXT: ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer)
;
%r = fcmp true <vscale x 4 x float> undef, undef
ret <vscale x 4 x i1> %r
}
define <vscale x 4 x i1> @fcmp_false() {
; CHECK-LABEL: @fcmp_false(
; CHECK-NEXT: ret <vscale x 4 x i1> zeroinitializer
;
%r = fcmp false <vscale x 4 x float> undef, undef
ret <vscale x 4 x i1> %r
}
define <vscale x 4 x i1> @fcmp_undef() {
; CHECK-LABEL: @fcmp_undef(
; CHECK-NEXT: ret <vscale x 4 x i1> undef
;
%r = icmp ne <vscale x 4 x i32> undef, undef
ret <vscale x 4 x i1> %r
}
define <vscale x 4 x i1> @fcmp_not_equality() {
; CHECK-LABEL: @fcmp_not_equality(
; CHECK-NEXT: ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer)
;
%r = icmp ule <vscale x 4 x i32> undef, zeroinitializer
ret <vscale x 4 x i1> %r
}

View File

@ -0,0 +1,39 @@
; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
; This test checks that SimplifyInstruction does not blow up in the face of
; a scalable shufflevector. vscale is a constant value known only at runtime.
; Therefore, it is not possible to know the concrete value of, or the length
; of the mask at compile time. Simplifications that depend on the value
; of the mask cannot be performed.
; Given the fact that the value of the mask is unknown at compile time for
; scalable vectors, very few simplifications will be done. Here, we want to
; see that the instruction can be passed to SimplifyInstruction and not crash
; the compiler. It happens to be the case that this will be the result.
; CHECK-LABEL: define <vscale x 8 x i1> @vscale_version()
; CHECK-NEXT: ret <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer)
define <vscale x 8 x i1> @vscale_version() {
%splatter = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%foo = shufflevector <vscale x 8 x i1> %splatter,
<vscale x 8 x i1> undef,
<vscale x 8 x i32> zeroinitializer
ret <vscale x 8 x i1> %foo
}
; The non-scalable version should be optimized as normal.
; CHECK-LABEL: define <8 x i1> @fixed_length_version() {
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
define <8 x i1> @fixed_length_version() {
%splatter = insertelement <8 x i1> poison, i1 true, i32 0
%foo = shufflevector <8 x i1> %splatter,
<8 x i1> undef,
<8 x i32> zeroinitializer
ret <8 x i1> %foo
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,199 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instsimplify -S -verify | FileCheck %s
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Vector Operations
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; insertelement
define <vscale x 4 x i32> @insertelement_idx_undef(<vscale x 4 x i32> %a) {
; CHECK-LABEL: @insertelement_idx_undef(
; CHECK-NEXT: ret <vscale x 4 x i32> undef
;
%r = insertelement <vscale x 4 x i32> %a, i32 5, i64 undef
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @insertelement_value_undef(<vscale x 4 x i32> %a) {
; CHECK-LABEL: @insertelement_value_undef(
; CHECK-NEXT: [[R:%.*]] = insertelement <vscale x 4 x i32> [[A:%.*]], i32 undef, i64 0
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
;
%r = insertelement <vscale x 4 x i32> %a, i32 undef, i64 0
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @insertelement_idx_maybe_out_of_bound(<vscale x 4 x i32> %a) {
; CHECK-LABEL: @insertelement_idx_maybe_out_of_bound(
; CHECK-NEXT: [[R:%.*]] = insertelement <vscale x 4 x i32> [[A:%.*]], i32 5, i64 4
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
;
%r = insertelement <vscale x 4 x i32> %a, i32 5, i64 4
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @insertelement_idx_large_bound(<vscale x 4 x i32> %a) {
; CHECK-LABEL: @insertelement_idx_large_bound(
; CHECK-NEXT: [[R:%.*]] = insertelement <vscale x 4 x i32> [[A:%.*]], i32 5, i64 12345
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
;
%r = insertelement <vscale x 4 x i32> %a, i32 5, i64 12345
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @insert_extract_element_same_vec_idx_1(<vscale x 4 x i32> %a) {
; CHECK-LABEL: @insert_extract_element_same_vec_idx_1(
; CHECK-NEXT: ret <vscale x 4 x i32> [[A:%.*]]
;
%v = extractelement <vscale x 4 x i32> %a, i64 1
%r = insertelement <vscale x 4 x i32> %a, i32 %v, i64 1
ret <vscale x 4 x i32> %r
}
define <vscale x 4 x i32> @insertelement_inline_to_ret() {
; CHECK-LABEL: @insertelement_inline_to_ret(
; CHECK-NEXT: ret <vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0)
;
%i = insertelement <vscale x 4 x i32> poison, i32 1, i32 0
ret <vscale x 4 x i32> %i
}
define <vscale x 4 x i32> @insertelement_shufflevector_inline_to_ret() {
; CHECK-LABEL: @insertelement_shufflevector_inline_to_ret(
; CHECK-NEXT: ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
;
%i = insertelement <vscale x 4 x i32> poison, i32 1, i32 0
%i2 = shufflevector <vscale x 4 x i32> %i, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
ret <vscale x 4 x i32> %i2
}
; extractelement
define i32 @extractelement_idx_undef(<vscale x 4 x i32> %a) {
; CHECK-LABEL: @extractelement_idx_undef(
; CHECK-NEXT: ret i32 undef
;
%r = extractelement <vscale x 4 x i32> %a, i64 undef
ret i32 %r
}
define i32 @extractelement_vec_undef(<vscale x 4 x i32> %a) {
; CHECK-LABEL: @extractelement_vec_undef(
; CHECK-NEXT: ret i32 undef
;
%r = extractelement <vscale x 4 x i32> undef, i64 1
ret i32 %r
}
define i32 @extractelement_idx_maybe_out_of_bound(<vscale x 4 x i32> %a) {
; CHECK-LABEL: @extractelement_idx_maybe_out_of_bound(
; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i64 4
; CHECK-NEXT: ret i32 [[R]]
;
%r = extractelement <vscale x 4 x i32> %a, i64 4
ret i32 %r
}
define i32 @extractelement_idx_large_bound(<vscale x 4 x i32> %a) {
; CHECK-LABEL: @extractelement_idx_large_bound(
; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i64 12345
; CHECK-NEXT: ret i32 [[R]]
;
%r = extractelement <vscale x 4 x i32> %a, i64 12345
ret i32 %r
}
define i32 @insert_extract_element_same_vec_idx_2() {
; CHECK-LABEL: @insert_extract_element_same_vec_idx_2(
; CHECK-NEXT: ret i32 1
;
%v = insertelement <vscale x 4 x i32> poison, i32 1, i64 4
%r = extractelement <vscale x 4 x i32> %v, i64 4
ret i32 %r
}
define i32 @insert_extract_element_same_vec_idx_3() {
; CHECK-LABEL: @insert_extract_element_same_vec_idx_3(
; CHECK-NEXT: ret i32 1
;
%r = extractelement <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1, i64 4), i64 4
ret i32 %r
}
define i32 @insert_extract_element_same_vec_idx_4() {
; CHECK-LABEL: @insert_extract_element_same_vec_idx_4(
; CHECK-NEXT: ret i32 1
;
%r = extractelement <vscale x 4 x i32> insertelement (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1, i32 4), i32 2, i64 3), i64 4
ret i32 %r
}
; more complicated expressions
define <vscale x 2 x i1> @cmp_le_smax_always_true(<vscale x 2 x i64> %x) {
; CHECK-LABEL: @cmp_le_smax_always_true(
; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer)
;
%cmp = icmp sle <vscale x 2 x i64> %x, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 9223372036854775807, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer)
ret <vscale x 2 x i1> %cmp
}
define <vscale x 4 x float> @bitcast() {
; CHECK-LABEL: @bitcast(
; CHECK-NEXT: ret <vscale x 4 x float> bitcast (<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x float>)
;
%i1 = insertelement <vscale x 4 x i32> poison, i32 1, i32 0
%i2 = shufflevector <vscale x 4 x i32> %i1, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%i3 = bitcast <vscale x 4 x i32> %i2 to <vscale x 4 x float>
ret <vscale x 4 x float> %i3
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Memory Access and Addressing Operations
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; getelementptr
define <vscale x 4 x i32*> @getelementptr_constant_foldable_1() {
; CHECK-LABEL: @getelementptr_constant_foldable_1(
; CHECK-NEXT: ret <vscale x 4 x i32*> zeroinitializer
;
%ptr = getelementptr i32, <vscale x 4 x i32*> zeroinitializer, <vscale x 4 x i64> undef
ret <vscale x 4 x i32*> %ptr
}
define <vscale x 4 x <vscale x 4 x i32>*> @getelementptr_constant_foldable_2() {
; CHECK-LABEL: @getelementptr_constant_foldable_2(
; CHECK-NEXT: ret <vscale x 4 x <vscale x 4 x i32>*> zeroinitializer
;
%ptr = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* null, <vscale x 4 x i64> undef
ret <vscale x 4 x <vscale x 4 x i32>*> %ptr
}
; fold getelementptr P, 0 -> P.
define <vscale x 4 x i32>* @getelementptr_constant_foldable_3() {
; CHECK-LABEL: @getelementptr_constant_foldable_3(
; CHECK-NEXT: ret <vscale x 4 x i32>* null
;
%ptr = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* null, i64 0
ret <vscale x 4 x i32>* %ptr
}
define <vscale x 4 x i32>* @getelementptr_not_constant_foldable(i64 %x) {
; CHECK-LABEL: @getelementptr_not_constant_foldable(
; CHECK-NEXT: [[PTR:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* null, i64 [[X:%.*]]
; CHECK-NEXT: ret <vscale x 4 x i32>* [[PTR]]
;
%ptr = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* null, i64 %x
ret <vscale x 4 x i32>* %ptr
}
; Check GEP's result is known to be non-null.
define i1 @getelementptr_check_non_null(<vscale x 16 x i8>* %ptr) {
; CHECK-LABEL: @getelementptr_check_non_null(
; CHECK-NEXT: ret i1 false
;
%x = getelementptr inbounds <vscale x 16 x i8>, <vscale x 16 x i8>* %ptr, i32 1
%cmp = icmp eq <vscale x 16 x i8>* %x, null
ret i1 %cmp
}

View File

@ -0,0 +1,95 @@
; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -dce -S -o - %s | FileCheck %s
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
define void @base_case(i1 %cnd, i32 addrspace(1)* %a, i32 addrspace(1)* %b, <3 x i32> addrspace(1)* %out) {
; CHECK-LABEL: @base_case
; CHECK: load <3 x i32>
entry:
%gep1 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 1
%gep2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 2
%gep4 = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 1
%gep5 = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 2
%selected = select i1 %cnd, i32 addrspace(1)* %a, i32 addrspace(1)* %b
%selected14 = select i1 %cnd, i32 addrspace(1)* %gep1, i32 addrspace(1)* %gep4
%selected25 = select i1 %cnd, i32 addrspace(1)* %gep2, i32 addrspace(1)* %gep5
%val0 = load i32, i32 addrspace(1)* %selected, align 4
%val1 = load i32, i32 addrspace(1)* %selected14, align 4
%val2 = load i32, i32 addrspace(1)* %selected25, align 4
%t0 = insertelement <3 x i32> poison, i32 %val0, i32 0
%t1 = insertelement <3 x i32> %t0, i32 %val1, i32 1
%t2 = insertelement <3 x i32> %t1, i32 %val2, i32 2
store <3 x i32> %t2, <3 x i32> addrspace(1)* %out
ret void
}
define void @scev_targeting_complex_case(i1 %cnd, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %base, <2 x i32> addrspace(1)* %out) {
; CHECK-LABEL: @scev_targeting_complex_case
; CHECK: load <2 x i32>
entry:
%base.x4 = shl i32 %base, 2
%base.x4.p1 = add i32 %base.x4, 1
%base.x4.p2 = add i32 %base.x4, 2
%base.x4.p3 = add i32 %base.x4, 3
%zext.x4 = zext i32 %base.x4 to i64
%zext.x4.p1 = zext i32 %base.x4.p1 to i64
%zext.x4.p2 = zext i32 %base.x4.p2 to i64
%zext.x4.p3 = zext i32 %base.x4.p3 to i64
%base.x16 = mul i64 %zext.x4, 4
%base.x16.p4 = shl i64 %zext.x4.p1, 2
%base.x16.p8 = shl i64 %zext.x4.p2, 2
%base.x16.p12 = mul i64 %zext.x4.p3, 4
%a.pi8 = bitcast i32 addrspace(1)* %a to i8 addrspace(1)*
%b.pi8 = bitcast i32 addrspace(1)* %b to i8 addrspace(1)*
%gep.a.base.x16 = getelementptr inbounds i8, i8 addrspace(1)* %a.pi8, i64 %base.x16
%gep.b.base.x16.p4 = getelementptr inbounds i8, i8 addrspace(1)* %b.pi8, i64 %base.x16.p4
%gep.a.base.x16.p8 = getelementptr inbounds i8, i8 addrspace(1)* %a.pi8, i64 %base.x16.p8
%gep.b.base.x16.p12 = getelementptr inbounds i8, i8 addrspace(1)* %b.pi8, i64 %base.x16.p12
%a.base.x16 = bitcast i8 addrspace(1)* %gep.a.base.x16 to i32 addrspace(1)*
%b.base.x16.p4 = bitcast i8 addrspace(1)* %gep.b.base.x16.p4 to i32 addrspace(1)*
%selected.base.x16.p0.or.4 = select i1 %cnd, i32 addrspace(1)* %a.base.x16, i32 addrspace(1)* %b.base.x16.p4
%gep.selected.base.x16.p8.or.12 = select i1 %cnd, i8 addrspace(1)* %gep.a.base.x16.p8, i8 addrspace(1)* %gep.b.base.x16.p12
%selected.base.x16.p8.or.12 = bitcast i8 addrspace(1)* %gep.selected.base.x16.p8.or.12 to i32 addrspace(1)*
%selected.base.x16.p40.or.44 = getelementptr inbounds i32, i32 addrspace(1)* %selected.base.x16.p0.or.4, i64 10
%selected.base.x16.p44.or.48 = getelementptr inbounds i32, i32 addrspace(1)* %selected.base.x16.p8.or.12, i64 9
%val0 = load i32, i32 addrspace(1)* %selected.base.x16.p40.or.44, align 4
%val1 = load i32, i32 addrspace(1)* %selected.base.x16.p44.or.48, align 4
%t0 = insertelement <2 x i32> poison, i32 %val0, i32 0
%t1 = insertelement <2 x i32> %t0, i32 %val1, i32 1
store <2 x i32> %t1, <2 x i32> addrspace(1)* %out
ret void
}
define void @nested_selects(i1 %cnd0, i1 %cnd1, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %base, <2 x i32> addrspace(1)* %out) {
; CHECK-LABEL: @nested_selects
; CHECK: load <2 x i32>
entry:
%base.p1 = add nsw i32 %base, 1
%base.p2 = add i32 %base, 2
%base.p3 = add nsw i32 %base, 3
%base.x4 = mul i32 %base, 4
%base.x4.p5 = add i32 %base.x4, 5
%base.x4.p6 = add i32 %base.x4, 6
%sext = sext i32 %base to i64
%sext.p1 = sext i32 %base.p1 to i64
%sext.p2 = sext i32 %base.p2 to i64
%sext.p3 = sext i32 %base.p3 to i64
%sext.x4.p5 = sext i32 %base.x4.p5 to i64
%sext.x4.p6 = sext i32 %base.x4.p6 to i64
%gep.a.base = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %sext
%gep.a.base.p1 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %sext.p1
%gep.a.base.p2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %sext.p2
%gep.a.base.p3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %sext.p3
%gep.b.base.x4.p5 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %sext.x4.p5
%gep.b.base.x4.p6 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %sext.x4.p6
%selected.1.L = select i1 %cnd1, i32 addrspace(1)* %gep.a.base.p2, i32 addrspace(1)* %gep.b.base.x4.p5
%selected.1.R = select i1 %cnd1, i32 addrspace(1)* %gep.a.base.p3, i32 addrspace(1)* %gep.b.base.x4.p6
%selected.0.L = select i1 %cnd0, i32 addrspace(1)* %gep.a.base, i32 addrspace(1)* %selected.1.L
%selected.0.R = select i1 %cnd0, i32 addrspace(1)* %gep.a.base.p1, i32 addrspace(1)* %selected.1.R
%val0 = load i32, i32 addrspace(1)* %selected.0.L, align 4
%val1 = load i32, i32 addrspace(1)* %selected.0.R, align 4
%t0 = insertelement <2 x i32> poison, i32 %val0, i32 0
%t1 = insertelement <2 x i32> %t0, i32 %val1, i32 1
store <2 x i32> %t1, <2 x i32> addrspace(1)* %out
ret void
}

View File

@ -0,0 +1,40 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
define <8 x double> @loadwidth_insert_extract(double* %ptr) {
%a = bitcast double* %ptr to <2 x double> *
%b = getelementptr <2 x double>, <2 x double>* %a, i32 1
%c = getelementptr <2 x double>, <2 x double>* %a, i32 2
%d = getelementptr <2 x double>, <2 x double>* %a, i32 3
; CHECK-HSW: load <4 x double>
; CHECK-HSW: load <4 x double>
; CHECK-HSW-NOT: load
; CHECK-KNL: load <8 x double>
; CHECK-KNL-NOT: load
%la = load <2 x double>, <2 x double> *%a
%lb = load <2 x double>, <2 x double> *%b
%lc = load <2 x double>, <2 x double> *%c
%ld = load <2 x double>, <2 x double> *%d
; Scalarize everything - Explicitly not a shufflevector to test this code
; path in the LSV
%v1 = extractelement <2 x double> %la, i32 0
%v2 = extractelement <2 x double> %la, i32 1
%v3 = extractelement <2 x double> %lb, i32 0
%v4 = extractelement <2 x double> %lb, i32 1
%v5 = extractelement <2 x double> %lc, i32 0
%v6 = extractelement <2 x double> %lc, i32 1
%v7 = extractelement <2 x double> %ld, i32 0
%v8 = extractelement <2 x double> %ld, i32 1
; Make a vector again
%i1 = insertelement <8 x double> poison, double %v1, i32 0
%i2 = insertelement <8 x double> %i1, double %v2, i32 1
%i3 = insertelement <8 x double> %i2, double %v3, i32 2
%i4 = insertelement <8 x double> %i3, double %v4, i32 3
%i5 = insertelement <8 x double> %i4, double %v5, i32 4
%i6 = insertelement <8 x double> %i5, double %v6, i32 5
%i7 = insertelement <8 x double> %i6, double %v7, i32 6
%i8 = insertelement <8 x double> %i7, double %v8, i32 7
ret <8 x double> %i8
}

View File

@ -0,0 +1,165 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -o - -S -load-store-vectorizer -dce %s | FileCheck %s
; Make sure LoadStoreVectorizer vectorizes the loads below.
; In order to prove that the vectorization is safe, it tries to
; match nested adds and find an expression that adds a constant
; value to an existing index and the result doesn't overflow.
target triple = "x86_64--"
define void @ld_v4i8_add_nsw(i32 %v0, i32 %v1, i8* %src, <4 x i8>* %dst) {
; CHECK-LABEL: @ld_v4i8_add_nsw(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[V1:%.*]], [[TMP]]
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[TMP3]] to <4 x i8>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP82:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
; CHECK-NEXT: [[TMP133:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
; CHECK-NEXT: [[TMP184:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> poison, i8 [[TMP41]], i32 0
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP82]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP133]], i32 2
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP184]], i32 3
; CHECK-NEXT: store <4 x i8> [[TMP22]], <4 x i8>* [[DST:%.*]], align 4
; CHECK-NEXT: ret void
;
bb:
%tmp = add nsw i32 %v0, -1
%tmp1 = add nsw i32 %v1, %tmp
%tmp2 = sext i32 %tmp1 to i64
%tmp3 = getelementptr inbounds i8, i8* %src, i64 %tmp2
%tmp4 = load i8, i8* %tmp3, align 1
%tmp5 = add nsw i32 %v1, %v0
%tmp6 = sext i32 %tmp5 to i64
%tmp7 = getelementptr inbounds i8, i8* %src, i64 %tmp6
%tmp8 = load i8, i8* %tmp7, align 1
%tmp9 = add nsw i32 %v0, 1
%tmp10 = add nsw i32 %v1, %tmp9
%tmp11 = sext i32 %tmp10 to i64
%tmp12 = getelementptr inbounds i8, i8* %src, i64 %tmp11
%tmp13 = load i8, i8* %tmp12, align 1
%tmp14 = add nsw i32 %v0, 2
%tmp15 = add nsw i32 %v1, %tmp14
%tmp16 = sext i32 %tmp15 to i64
%tmp17 = getelementptr inbounds i8, i8* %src, i64 %tmp16
%tmp18 = load i8, i8* %tmp17, align 1
%tmp19 = insertelement <4 x i8> poison, i8 %tmp4, i32 0
%tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
%tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
%tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
store <4 x i8> %tmp22, <4 x i8>* %dst
ret void
}
define void @ld_v4i8_add_nuw(i32 %v0, i32 %v1, i8* %src, <4 x i8>* %dst) {
; CHECK-LABEL: @ld_v4i8_add_nuw(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = add nuw i32 [[V0:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[V1:%.*]], [[TMP]]
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[TMP3]] to <4 x i8>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP82:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
; CHECK-NEXT: [[TMP133:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
; CHECK-NEXT: [[TMP184:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> poison, i8 [[TMP41]], i32 0
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP82]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP133]], i32 2
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP184]], i32 3
; CHECK-NEXT: store <4 x i8> [[TMP22]], <4 x i8>* [[DST:%.*]], align 4
; CHECK-NEXT: ret void
;
bb:
%tmp = add nuw i32 %v0, -1
%tmp1 = add nuw i32 %v1, %tmp
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = getelementptr inbounds i8, i8* %src, i64 %tmp2
%tmp4 = load i8, i8* %tmp3, align 1
%tmp5 = add nuw i32 %v1, %v0
%tmp6 = zext i32 %tmp5 to i64
%tmp7 = getelementptr inbounds i8, i8* %src, i64 %tmp6
%tmp8 = load i8, i8* %tmp7, align 1
%tmp9 = add nuw i32 %v0, 1
%tmp10 = add nuw i32 %v1, %tmp9
%tmp11 = zext i32 %tmp10 to i64
%tmp12 = getelementptr inbounds i8, i8* %src, i64 %tmp11
%tmp13 = load i8, i8* %tmp12, align 1
%tmp14 = add nuw i32 %v0, 2
%tmp15 = add nuw i32 %v1, %tmp14
%tmp16 = zext i32 %tmp15 to i64
%tmp17 = getelementptr inbounds i8, i8* %src, i64 %tmp16
%tmp18 = load i8, i8* %tmp17, align 1
%tmp19 = insertelement <4 x i8> poison, i8 %tmp4, i32 0
%tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
%tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
%tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
store <4 x i8> %tmp22, <4 x i8>* %dst
ret void
}
; Make sure we don't vectorize the loads below because the source of
; sext instructions doesn't have the nsw flag.
define void @ld_v4i8_add_not_safe(i32 %v0, i32 %v1, i8* %src, <4 x i8>* %dst) {
; CHECK-LABEL: @ld_v4i8_add_not_safe(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[V1:%.*]], [[TMP]]
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[TMP3]], align 1
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 1
; CHECK-NEXT: [[TMP9:%.*]] = add nsw i32 [[V0]], 1
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[V1]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1
; CHECK-NEXT: [[TMP14:%.*]] = add nsw i32 [[V0]], 2
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[V1]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[TMP17]], align 1
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> poison, i8 [[TMP4]], i32 0
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP8]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP13]], i32 2
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP18]], i32 3
; CHECK-NEXT: store <4 x i8> [[TMP22]], <4 x i8>* [[DST:%.*]], align 4
; CHECK-NEXT: ret void
;
bb:
%tmp = add nsw i32 %v0, -1
%tmp1 = add i32 %v1, %tmp
%tmp2 = sext i32 %tmp1 to i64
%tmp3 = getelementptr inbounds i8, i8* %src, i64 %tmp2
%tmp4 = load i8, i8* %tmp3, align 1
%tmp5 = add i32 %v1, %v0
%tmp6 = sext i32 %tmp5 to i64
%tmp7 = getelementptr inbounds i8, i8* %src, i64 %tmp6
%tmp8 = load i8, i8* %tmp7, align 1
%tmp9 = add nsw i32 %v0, 1
%tmp10 = add i32 %v1, %tmp9
%tmp11 = sext i32 %tmp10 to i64
%tmp12 = getelementptr inbounds i8, i8* %src, i64 %tmp11
%tmp13 = load i8, i8* %tmp12, align 1
%tmp14 = add nsw i32 %v0, 2
%tmp15 = add i32 %v1, %tmp14
%tmp16 = sext i32 %tmp15 to i64
%tmp17 = getelementptr inbounds i8, i8* %src, i64 %tmp16
%tmp18 = load i8, i8* %tmp17, align 1
%tmp19 = insertelement <4 x i8> poison, i8 %tmp4, i32 0
%tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
%tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
%tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
store <4 x i8> %tmp22, <4 x i8>* %dst
ret void
}

View File

@ -0,0 +1,257 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve %s -S -loop-reduce -o - | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m-arm-none-eabi"
define float @vctp8(float* %0, i32 %1) {
; CHECK-LABEL: @vctp8(
; CHECK-NEXT: [[TMP3:%.*]] = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP1:%.*]], -1
; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint float* [[TMP0:%.*]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 -32, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP4]], [[TMP9]]
; CHECK-NEXT: br label [[TMP11:%.*]]
; CHECK: 11:
; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP5]], [[TMP2:%.*]] ], [ [[TMP21:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ zeroinitializer, [[TMP2]] ], [ [[TMP19:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[TMP2]] ], [ [[TMP17:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP15:%.*]] = tail call <16 x i1> @llvm.arm.mve.vctp8(i32 [[TMP12]])
; CHECK-NEXT: [[MASK:%.*]] = tail call <4 x i1> @v16i1_to_v4i1(<16 x i1> [[TMP15]])
; CHECK-NEXT: [[TMP16:%.*]] = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP14]], i32 32, <4 x i1> [[MASK]])
; CHECK-NEXT: [[TMP17]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 1
; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 0
; CHECK-NEXT: [[TMP19]] = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[TMP13]], <4 x float> [[TMP18]], <4 x i1> [[MASK]], <4 x float> [[TMP13]])
; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP12]], 4
; CHECK-NEXT: [[TMP21]] = add i32 [[TMP12]], -4
; CHECK-NEXT: br i1 [[TMP20]], label [[TMP11]], label [[TMP22:%.*]]
; CHECK: 22:
; CHECK-NEXT: [[TMP23:%.*]] = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> [[TMP19]])
; CHECK-NEXT: [[TMP24:%.*]] = sitofp i32 [[TMP23]] to float
; CHECK-NEXT: [[TMP25:%.*]] = tail call float @llvm.fabs.f32(float [[TMP24]])
; CHECK-NEXT: ret float [[TMP25]]
;
%3 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
%4 = extractvalue { <4 x i32>, i32 } %3, 0
%5 = add nsw i32 %1, -1
%6 = ptrtoint float* %0 to i32
%7 = insertelement <4 x i32> poison, i32 %6, i32 0
%8 = add <4 x i32> %7, <i32 -32, i32 undef, i32 undef, i32 undef>
%9 = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> zeroinitializer
%10 = add <4 x i32> %4, %9
br label %11
11: ; preds = %11, %2
%12 = phi i32 [ %5, %2 ], [ %20, %11 ]
%13 = phi <4 x float> [ zeroinitializer, %2 ], [ %19, %11 ]
%14 = phi <4 x i32> [ %10, %2 ], [ %17, %11 ]
%15 = tail call <16 x i1> @llvm.arm.mve.vctp8(i32 %12)
%mask = tail call <4 x i1> @v16i1_to_v4i1(<16 x i1> %15)
%16 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %14, i32 32, <4 x i1> %mask)
%17 = extractvalue { <4 x float>, <4 x i32> } %16, 1
%18 = extractvalue { <4 x float>, <4 x i32> } %16, 0
%19 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %13, <4 x float> %18, <4 x i1> %mask, <4 x float> %13)
%20 = add nsw i32 %12, -4
%21 = icmp sgt i32 %12, 4
br i1 %21, label %11, label %22
22: ; preds = %11
%23 = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> %19)
%24 = sitofp i32 %23 to float
%25 = tail call float @llvm.fabs.f32(float %24)
ret float %25
}
define float @vctp16(float* %0, i32 %1) {
; CHECK-LABEL: @vctp16(
; CHECK-NEXT: [[TMP3:%.*]] = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP1:%.*]], -1
; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint float* [[TMP0:%.*]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 -32, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP4]], [[TMP9]]
; CHECK-NEXT: br label [[TMP11:%.*]]
; CHECK: 11:
; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP5]], [[TMP2:%.*]] ], [ [[TMP21:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ zeroinitializer, [[TMP2]] ], [ [[TMP19:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[TMP2]] ], [ [[TMP17:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP15:%.*]] = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP12]])
; CHECK-NEXT: [[MASK:%.*]] = tail call <4 x i1> @v8i1_to_v4i1(<8 x i1> [[TMP15]])
; CHECK-NEXT: [[TMP16:%.*]] = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP14]], i32 32, <4 x i1> [[MASK]])
; CHECK-NEXT: [[TMP17]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 1
; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 0
; CHECK-NEXT: [[TMP19]] = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[TMP13]], <4 x float> [[TMP18]], <4 x i1> [[MASK]], <4 x float> [[TMP13]])
; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP12]], 4
; CHECK-NEXT: [[TMP21]] = add i32 [[TMP12]], -4
; CHECK-NEXT: br i1 [[TMP20]], label [[TMP11]], label [[TMP22:%.*]]
; CHECK: 22:
; CHECK-NEXT: [[TMP23:%.*]] = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> [[TMP19]])
; CHECK-NEXT: [[TMP24:%.*]] = sitofp i32 [[TMP23]] to float
; CHECK-NEXT: [[TMP25:%.*]] = tail call float @llvm.fabs.f32(float [[TMP24]])
; CHECK-NEXT: ret float [[TMP25]]
;
%3 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
%4 = extractvalue { <4 x i32>, i32 } %3, 0
%5 = add nsw i32 %1, -1
%6 = ptrtoint float* %0 to i32
%7 = insertelement <4 x i32> poison, i32 %6, i32 0
%8 = add <4 x i32> %7, <i32 -32, i32 undef, i32 undef, i32 undef>
%9 = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> zeroinitializer
%10 = add <4 x i32> %4, %9
br label %11
11: ; preds = %11, %2
%12 = phi i32 [ %5, %2 ], [ %20, %11 ]
%13 = phi <4 x float> [ zeroinitializer, %2 ], [ %19, %11 ]
%14 = phi <4 x i32> [ %10, %2 ], [ %17, %11 ]
%15 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %12)
%mask = tail call <4 x i1> @v8i1_to_v4i1(<8 x i1> %15)
%16 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %14, i32 32, <4 x i1> %mask)
%17 = extractvalue { <4 x float>, <4 x i32> } %16, 1
%18 = extractvalue { <4 x float>, <4 x i32> } %16, 0
%19 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %13, <4 x float> %18, <4 x i1> %mask, <4 x float> %13)
%20 = add nsw i32 %12, -4
%21 = icmp sgt i32 %12, 4
br i1 %21, label %11, label %22
22: ; preds = %11
%23 = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> %19)
%24 = sitofp i32 %23 to float
%25 = tail call float @llvm.fabs.f32(float %24)
ret float %25
}
define float @vctpi32(float* %0, i32 %1) {
; CHECK-LABEL: @vctpi32(
; CHECK-NEXT: [[TMP3:%.*]] = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP1:%.*]], -1
; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint float* [[TMP0:%.*]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 -32, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP4]], [[TMP9]]
; CHECK-NEXT: br label [[TMP11:%.*]]
; CHECK: 11:
; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP5]], [[TMP2:%.*]] ], [ [[TMP21:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ zeroinitializer, [[TMP2]] ], [ [[TMP19:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[TMP2]] ], [ [[TMP17:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP15:%.*]] = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP12]])
; CHECK-NEXT: [[TMP16:%.*]] = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP14]], i32 32, <4 x i1> [[TMP15]])
; CHECK-NEXT: [[TMP17]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 1
; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 0
; CHECK-NEXT: [[TMP19]] = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[TMP13]], <4 x float> [[TMP18]], <4 x i1> [[TMP15]], <4 x float> [[TMP13]])
; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP12]], 4
; CHECK-NEXT: [[TMP21]] = add i32 [[TMP12]], -4
; CHECK-NEXT: br i1 [[TMP20]], label [[TMP11]], label [[TMP22:%.*]]
; CHECK: 22:
; CHECK-NEXT: [[TMP23:%.*]] = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> [[TMP19]])
; CHECK-NEXT: [[TMP24:%.*]] = sitofp i32 [[TMP23]] to float
; CHECK-NEXT: [[TMP25:%.*]] = tail call float @llvm.fabs.f32(float [[TMP24]])
; CHECK-NEXT: ret float [[TMP25]]
;
%3 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
%4 = extractvalue { <4 x i32>, i32 } %3, 0
%5 = add nsw i32 %1, -1
%6 = ptrtoint float* %0 to i32
%7 = insertelement <4 x i32> poison, i32 %6, i32 0
%8 = add <4 x i32> %7, <i32 -32, i32 undef, i32 undef, i32 undef>
%9 = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> zeroinitializer
%10 = add <4 x i32> %4, %9
br label %11
11: ; preds = %11, %2
%12 = phi i32 [ %5, %2 ], [ %20, %11 ]
%13 = phi <4 x float> [ zeroinitializer, %2 ], [ %19, %11 ]
%14 = phi <4 x i32> [ %10, %2 ], [ %17, %11 ]
%15 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %12)
%16 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %14, i32 32, <4 x i1> %15)
%17 = extractvalue { <4 x float>, <4 x i32> } %16, 1
%18 = extractvalue { <4 x float>, <4 x i32> } %16, 0
%19 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %13, <4 x float> %18, <4 x i1> %15, <4 x float> %13)
%20 = add nsw i32 %12, -4
%21 = icmp sgt i32 %12, 4
br i1 %21, label %11, label %22
22: ; preds = %11
%23 = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> %19)
%24 = sitofp i32 %23 to float
%25 = tail call float @llvm.fabs.f32(float %24)
ret float %25
}
define float @vctpi64(float* %0, i32 %1) {
; CHECK-LABEL: @vctpi64(
; CHECK-NEXT: [[TMP3:%.*]] = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP1:%.*]], -1
; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint float* [[TMP0:%.*]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 -32, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP4]], [[TMP9]]
; CHECK-NEXT: br label [[TMP11:%.*]]
; CHECK: 11:
; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP5]], [[TMP2:%.*]] ], [ [[TMP21:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ zeroinitializer, [[TMP2]] ], [ [[TMP19:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[TMP2]] ], [ [[TMP17:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP15:%.*]] = tail call <4 x i1> @llvm.arm.mve.vctp64(i32 [[TMP12]])
; CHECK-NEXT: [[TMP16:%.*]] = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP14]], i32 32, <4 x i1> [[TMP15]])
; CHECK-NEXT: [[TMP17]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 1
; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 0
; CHECK-NEXT: [[TMP19]] = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[TMP13]], <4 x float> [[TMP18]], <4 x i1> [[TMP15]], <4 x float> [[TMP13]])
; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP12]], 4
; CHECK-NEXT: [[TMP21]] = add i32 [[TMP12]], -4
; CHECK-NEXT: br i1 [[TMP20]], label [[TMP11]], label [[TMP22:%.*]]
; CHECK: 22:
; CHECK-NEXT: [[TMP23:%.*]] = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> [[TMP19]])
; CHECK-NEXT: [[TMP24:%.*]] = sitofp i32 [[TMP23]] to float
; CHECK-NEXT: [[TMP25:%.*]] = tail call float @llvm.fabs.f32(float [[TMP24]])
; CHECK-NEXT: ret float [[TMP25]]
;
%3 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
%4 = extractvalue { <4 x i32>, i32 } %3, 0
%5 = add nsw i32 %1, -1
%6 = ptrtoint float* %0 to i32
%7 = insertelement <4 x i32> poison, i32 %6, i32 0
%8 = add <4 x i32> %7, <i32 -32, i32 undef, i32 undef, i32 undef>
%9 = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> zeroinitializer
%10 = add <4 x i32> %4, %9
br label %11
11: ; preds = %11, %2
%12 = phi i32 [ %5, %2 ], [ %20, %11 ]
%13 = phi <4 x float> [ zeroinitializer, %2 ], [ %19, %11 ]
%14 = phi <4 x i32> [ %10, %2 ], [ %17, %11 ]
%15 = tail call <4 x i1> @llvm.arm.mve.vctp64(i32 %12)
%16 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %14, i32 32, <4 x i1> %15)
%17 = extractvalue { <4 x float>, <4 x i32> } %16, 1
%18 = extractvalue { <4 x float>, <4 x i32> } %16, 0
%19 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %13, <4 x float> %18, <4 x i1> %15, <4 x float> %13)
%20 = add nsw i32 %12, -4
%21 = icmp sgt i32 %12, 4
br i1 %21, label %11, label %22
22: ; preds = %11
%23 = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> %19)
%24 = sitofp i32 %23 to float
%25 = tail call float @llvm.fabs.f32(float %24)
ret float %25
}
declare { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32, i32)
declare <16 x i1> @llvm.arm.mve.vctp8(i32)
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare <4 x i1> @llvm.arm.mve.vctp64(i32)
declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>)
declare i32 @vecAddAcrossF32Mve(...)
declare <4 x i1> @v8i1_to_v4i1(<8 x i1>)
declare <4 x i1> @v16i1_to_v4i1(<16 x i1>)
declare float @llvm.fabs.f32(float)

View File

@ -0,0 +1,256 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -loop-unroll | FileCheck %s
; RUN: opt < %s -S -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -loop-unroll | FileCheck %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
; Function Attrs: norecurse nounwind
define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_addr #0 {
; CHECK-LABEL: @f(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[K:%.*]], 0
; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.lr.ph:
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[K]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K]], 16
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967280
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[N_VEC]], -16
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[XTRAITER1:%.*]] = and i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], 1
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]]
; CHECK: vector.ph.new:
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER1]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND12:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT13_1:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_1:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12]]
; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[S:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP9]], align 1
; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: [[NITER_NSUB:%.*]] = sub i64 [[NITER]], 1
; CHECK-NEXT: [[TMP10:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND_NEXT13]]
; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i32> [[TMP10]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <16 x i32> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_NEXT]]
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP13]], <16 x i8>* [[TMP15]], align 1
; CHECK-NEXT: [[INDEX_NEXT_1]] = add i64 [[INDEX_NEXT]], 16
; CHECK-NEXT: [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND_NEXT13]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: [[NITER_NSUB_1]] = sub i64 [[NITER_NSUB]], 1
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NSUB_1]], 0
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]]
; CHECK: middle.block.unr-lcssa.loopexit:
; CHECK-NEXT: [[INDEX_UNR_PH:%.*]] = phi i64 [ [[INDEX_NEXT_1]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND12_UNR_PH:%.*]] = phi <16 x i32> [ [[VEC_IND_NEXT13_1]], [[VECTOR_BODY]] ]
; CHECK-NEXT: br label [[MIDDLE_BLOCK_UNR_LCSSA]]
; CHECK: middle.block.unr-lcssa:
; CHECK-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD2]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: vector.body.epil.preheader:
; CHECK-NEXT: br label [[VECTOR_BODY_EPIL:%.*]]
; CHECK: vector.body.epil:
; CHECK-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ]
; CHECK-NEXT: [[VEC_IND12_EPIL:%.*]] = phi <16 x i32> [ [[VEC_IND12_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ]
; CHECK-NEXT: [[TMP16:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12_EPIL]]
; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i32> [[TMP16]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq <16 x i32> [[TMP17]], zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_EPIL]]
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP19]], <16 x i8>* [[TMP21]], align 1
; CHECK-NEXT: [[INDEX_NEXT_EPIL:%.*]] = add i64 [[INDEX_EPIL]], 16
; CHECK-NEXT: [[VEC_IND_NEXT13_EPIL:%.*]] = add <16 x i32> [[VEC_IND12_EPIL]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT_EPIL]], [[N_VEC]]
; CHECK-NEXT: br label [[MIDDLE_BLOCK_EPILOG_LCSSA:%.*]]
; CHECK: middle.block.epilog-lcssa:
; CHECK-NEXT: br label [[MIDDLE_BLOCK]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]]
; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1
; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[TMP24]], [[INDVARS_IV_PH]]
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP23]], 7
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]]
; CHECK: for.body.prol.preheader:
; CHECK-NEXT: br label [[FOR_BODY_PROL:%.*]]
; CHECK: for.body.prol:
; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ]
; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ]
; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32
; CHECK-NEXT: [[SHL_PROL:%.*]] = shl i32 1, [[TMP26]]
; CHECK-NEXT: [[AND_PROL:%.*]] = and i32 [[SHL_PROL]], [[X]]
; CHECK-NEXT: [[TOBOOL_PROL:%.*]] = icmp eq i32 [[AND_PROL]], 0
; CHECK-NEXT: [[CONV_PROL:%.*]] = select i1 [[TOBOOL_PROL]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_PROL]]
; CHECK-NEXT: store i8 [[CONV_PROL]], i8* [[ARRAYIDX_PROL]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
; CHECK-NEXT: [[EXITCOND_PROL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_PROL]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: [[PROL_ITER_SUB]] = sub i64 [[PROL_ITER]], 1
; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_SUB]], 0
; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], [[LOOP0:!llvm.loop !.*]]
; CHECK: for.body.prol.loopexit.unr-lcssa:
; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ]
; CHECK-NEXT: br label [[FOR_BODY_PROL_LOOPEXIT]]
; CHECK: for.body.prol.loopexit:
; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
; CHECK-NEXT: [[TMP27:%.*]] = icmp ult i64 [[TMP25]], 7
; CHECK-NEXT: br i1 [[TMP27]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
; CHECK: for.body.preheader.new:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP28]]
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[X]]
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TOBOOL]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 1, [[TMP29]]
; CHECK-NEXT: [[AND_1:%.*]] = and i32 [[SHL_1]], [[X]]
; CHECK-NEXT: [[TOBOOL_1:%.*]] = icmp eq i32 [[AND_1]], 0
; CHECK-NEXT: [[CONV_1:%.*]] = select i1 [[TOBOOL_1]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: store i8 [[CONV_1]], i8* [[ARRAYIDX_1]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 1, [[TMP30]]
; CHECK-NEXT: [[AND_2:%.*]] = and i32 [[SHL_2]], [[X]]
; CHECK-NEXT: [[TOBOOL_2:%.*]] = icmp eq i32 [[AND_2]], 0
; CHECK-NEXT: [[CONV_2:%.*]] = select i1 [[TOBOOL_2]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: store i8 [[CONV_2]], i8* [[ARRAYIDX_2]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
; CHECK-NEXT: [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 1, [[TMP31]]
; CHECK-NEXT: [[AND_3:%.*]] = and i32 [[SHL_3]], [[X]]
; CHECK-NEXT: [[TOBOOL_3:%.*]] = icmp eq i32 [[AND_3]], 0
; CHECK-NEXT: [[CONV_3:%.*]] = select i1 [[TOBOOL_3]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: store i8 [[CONV_3]], i8* [[ARRAYIDX_3]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
; CHECK-NEXT: [[SHL_4:%.*]] = shl i32 1, [[TMP32]]
; CHECK-NEXT: [[AND_4:%.*]] = and i32 [[SHL_4]], [[X]]
; CHECK-NEXT: [[TOBOOL_4:%.*]] = icmp eq i32 [[AND_4]], 0
; CHECK-NEXT: [[CONV_4:%.*]] = select i1 [[TOBOOL_4]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_3]]
; CHECK-NEXT: store i8 [[CONV_4]], i8* [[ARRAYIDX_4]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
; CHECK-NEXT: [[SHL_5:%.*]] = shl i32 1, [[TMP33]]
; CHECK-NEXT: [[AND_5:%.*]] = and i32 [[SHL_5]], [[X]]
; CHECK-NEXT: [[TOBOOL_5:%.*]] = icmp eq i32 [[AND_5]], 0
; CHECK-NEXT: [[CONV_5:%.*]] = select i1 [[TOBOOL_5]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_4]]
; CHECK-NEXT: store i8 [[CONV_5]], i8* [[ARRAYIDX_5]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
; CHECK-NEXT: [[SHL_6:%.*]] = shl i32 1, [[TMP34]]
; CHECK-NEXT: [[AND_6:%.*]] = and i32 [[SHL_6]], [[X]]
; CHECK-NEXT: [[TOBOOL_6:%.*]] = icmp eq i32 [[AND_6]], 0
; CHECK-NEXT: [[CONV_6:%.*]] = select i1 [[TOBOOL_6]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_5]]
; CHECK-NEXT: store i8 [[CONV_6]], i8* [[ARRAYIDX_6]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
; CHECK-NEXT: [[SHL_7:%.*]] = shl i32 1, [[TMP35]]
; CHECK-NEXT: [[AND_7:%.*]] = and i32 [[SHL_7]], [[X]]
; CHECK-NEXT: [[TOBOOL_7:%.*]] = icmp eq i32 [[AND_7]], 0
; CHECK-NEXT: [[CONV_7:%.*]] = select i1 [[TOBOOL_7]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_6]]
; CHECK-NEXT: store i8 [[CONV_7]], i8* [[ARRAYIDX_7]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1
; CHECK-NEXT: [[EXITCOND_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br i1 [[EXITCOND_7]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY]]
; CHECK: for.end.loopexit.unr-lcssa:
; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[K]] to i64
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[IDXPROM1]]
; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX2]], align 1
; CHECK-NEXT: ret i8* [[S]]
;
entry:
%cmp10 = icmp sgt i32 %k, 0
br i1 %cmp10, label %for.body.lr.ph, label %for.end
for.body.lr.ph: ; preds = %entry
%wide.trip.count = zext i32 %k to i64
%min.iters.check = icmp ult i32 %k, 16
br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
vector.ph: ; preds = %for.body.lr.ph
%n.vec = and i64 %wide.trip.count, 4294967280
%broadcast.splatinsert = insertelement <16 x i32> poison, i32 %x, i32 0
%broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.ind12 = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, %vector.ph ], [ %vec.ind.next13, %vector.body ]
%0 = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %vec.ind12
%1 = and <16 x i32> %0, %broadcast.splat
%2 = icmp eq <16 x i32> %1, zeroinitializer
%3 = select <16 x i1> %2, <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
%4 = getelementptr inbounds i8, i8* %s, i64 %index
%5 = bitcast i8* %4 to <16 x i8>*
store <16 x i8> %3, <16 x i8>* %5, align 1
%index.next = add i64 %index, 16
%vec.ind.next13 = add <16 x i32> %vec.ind12, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%6 = icmp eq i64 %index.next, %n.vec
br i1 %6, label %middle.block, label %vector.body
middle.block: ; preds = %vector.body
%cmp.n = icmp eq i64 %n.vec, %wide.trip.count
br i1 %cmp.n, label %for.end, label %for.body.preheader
for.body.preheader: ; preds = %middle.block, %for.body.lr.ph
%indvars.iv.ph = phi i64 [ 0, %for.body.lr.ph ], [ %n.vec, %middle.block ]
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
%7 = trunc i64 %indvars.iv to i32
%shl = shl i32 1, %7
%and = and i32 %shl, %x
%tobool = icmp eq i32 %and, 0
%conv = select i1 %tobool, i8 48, i8 49
%arrayidx = getelementptr inbounds i8, i8* %s, i64 %indvars.iv
store i8 %conv, i8* %arrayidx, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %middle.block, %entry
%idxprom1 = sext i32 %k to i64
%arrayidx2 = getelementptr inbounds i8, i8* %s, i64 %idxprom1
store i8 0, i8* %arrayidx2, align 1
ret i8* %s
}

View File

@ -0,0 +1,43 @@
; XFAIL: *
; RUN: opt < %s -basic-aa -newgvn -S | FileCheck %s
declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
; This test ensures that masked scatter and gather operations, which take vectors of pointers,
; do not have pointer aliasing ignored when being processed.
; No scatter/gather calls should end up eliminated
; CHECK: llvm.masked.gather
; CHECK: llvm.masked.gather
; CHECK: llvm.masked.scatter
; CHECK: llvm.masked.gather
; CHECK: llvm.masked.scatter
; CHECK: llvm.masked.gather
define spir_kernel void @test(<2 x i32*> %in1, <2 x i32*> %in2, i32* %out) {
entry:
; Just some temporary storage
%tmp.0 = alloca i32
%tmp.1 = alloca i32
%tmp.i = insertelement <2 x i32*> poison, i32* %tmp.0, i32 0
%tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1
; Read from in1 and in2
%in1.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
%in2.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
; Store in1 to the allocas
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
; Read in1 from the allocas
; This gather should alias the scatter we just saw
%tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
; Store in2 to the allocas
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
; Read in2 from the allocas
; This gather should alias the scatter we just saw, and not be eliminated
%tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
; Store in2 to out for good measure
%tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0
%tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1
store i32 %tmp.v.1.0, i32* %out
%out.1 = getelementptr i32, i32* %out, i32 1
store i32 %tmp.v.1.1, i32* %out.1
ret void
}

View File

@ -0,0 +1,165 @@
; TEST that counter updates are promoted outside the whole loop nest
; RUN: opt < %s -pgo-instr-gen -instrprof -do-counter-promotion=true -S | FileCheck --check-prefix=PROMO %s
; RUN: opt < %s --passes=pgo-instr-gen,instrprof -do-counter-promotion=true -S | FileCheck --check-prefix=PROMO %s
@g = common local_unnamed_addr global i32 0, align 4
@c = local_unnamed_addr global i32 10, align 4
; Function Attrs: noinline norecurse nounwind uwtable
define void @bar() local_unnamed_addr #0 {
bb:
%tmp2 = load i32, i32* @g, align 4, !tbaa !2
%tmp3 = add nsw i32 %tmp2, 1
store i32 %tmp3, i32* @g, align 4, !tbaa !2
ret void
}
; Function Attrs: norecurse nounwind uwtable
define i32 @main() local_unnamed_addr #1 {
bb:
store i32 0, i32* @g, align 4, !tbaa !2
%tmp = load i32, i32* @c, align 4, !tbaa !2
%tmp1 = icmp sgt i32 %tmp, 0
br i1 %tmp1, label %bb2_1, label %bb84
bb2_1:
br label %bb2
bb2: ; preds = %bb39, %bb
%tmp3 = phi i32 [ %tmp40, %bb39 ], [ %tmp, %bb2_1 ]
%tmp5 = phi i32 [ %tmp43, %bb39 ], [ 0, %bb2_1 ]
%tmp7 = icmp sgt i32 %tmp3, 0
br i1 %tmp7, label %bb14_1, label %bb39
bb8: ; preds = %bb39
; PROMO-LABEL: bb8
; PROMO: load {{.*}} @__profc_main{{.*}}
; PROMO-NEXT: add
; PROMO-NEXT: store {{.*}}@__profc_main{{.*}}
; PROMO-NEXT: load {{.*}} @__profc_main{{.*}}
; PROMO-NEXT: add
; PROMO-NEXT: store {{.*}}@__profc_main{{.*}}
; PROMO-NEXT: load {{.*}} @__profc_main{{.*}}
; PROMO-NEXT: add
; PROMO-NEXT: store {{.*}}@__profc_main{{.*}}
; PROMO-NEXT: load {{.*}} @__profc_main{{.*}}
; PROMO-NEXT: add
; PROMO-NEXT: store {{.*}}@__profc_main{{.*}}
; PROMO-NEXT: load {{.*}} @__profc_main{{.*}}
; PROMO-NEXT: add
; PROMO-NEXT: store {{.*}}@__profc_main{{.*}}
%tmp13 = icmp sgt i32 %tmp40, 0
br i1 %tmp13, label %bb45, label %bb84
bb14_1:
br label %bb14
bb14: ; preds = %bb29, %bb2
%tmp15 = phi i32 [ %tmp30, %bb29 ], [ %tmp3, %bb14_1 ]
%tmp16 = phi i64 [ %tmp31, %bb29 ], [ 0, %bb14_1 ]
%tmp17 = phi i64 [ %tmp32, %bb29 ], [ 0, %bb14_1 ]
%tmp18 = phi i32 [ %tmp33, %bb29 ], [ 0, %bb14_1 ]
%tmp19 = icmp sgt i32 %tmp15, 0
br i1 %tmp19, label %bb20_split, label %bb29
bb20_split:
br label %bb20
bb20: ; preds = %bb20, %bb14
%tmp21 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb20_split ]
%tmp22 = phi i32 [ %tmp24, %bb20 ], [ 0, %bb20_split ]
%tmp23 = add nuw i64 %tmp21, 1
tail call void @bar()
%tmp24 = add nuw nsw i32 %tmp22, 1
%tmp25 = load i32, i32* @c, align 4, !tbaa !2
%tmp26 = icmp slt i32 %tmp24, %tmp25
br i1 %tmp26, label %bb20, label %bb27
bb27: ; preds = %bb20
%tmp28 = add i64 %tmp23, %tmp16
br label %bb29
bb29: ; preds = %bb27, %bb14
%tmp30 = phi i32 [ %tmp25, %bb27 ], [ %tmp15, %bb14 ]
%tmp31 = phi i64 [ %tmp28, %bb27 ], [ %tmp16, %bb14 ]
%tmp32 = add nuw i64 %tmp17, 1
%tmp33 = add nuw nsw i32 %tmp18, 1
%tmp34 = icmp slt i32 %tmp33, %tmp30
br i1 %tmp34, label %bb14, label %bb35
bb35: ; preds = %bb29
%tmp36 = insertelement <2 x i64> poison, i64 %tmp31, i32 0
br label %bb39
bb39: ; preds = %bb35, %bb2
%tmp40 = phi i32 [ %tmp30, %bb35 ], [ %tmp3, %bb2 ]
%tmp43 = add nuw nsw i32 %tmp5, 1
%tmp44 = icmp slt i32 %tmp43, %tmp40
br i1 %tmp44, label %bb2, label %bb8
bb45: ; preds = %bb67, %bb8
%tmp46 = phi i32 [ %tmp68, %bb67 ], [ %tmp40, %bb8 ]
%tmp47 = phi i64 [ %tmp69, %bb67 ], [ 0, %bb8 ]
%tmp48 = phi i64 [ %tmp70, %bb67 ], [ 0, %bb8 ]
%tmp49 = phi i32 [ %tmp71, %bb67 ], [ 0, %bb8 ]
%tmp50 = icmp sgt i32 %tmp46, 0
br i1 %tmp50, label %bb57, label %bb67
bb51: ; preds = %bb67
%tmp56 = icmp sgt i32 %tmp68, 0
br i1 %tmp56, label %bb73, label %bb84
bb57: ; preds = %bb57, %bb45
%tmp58 = phi i64 [ %tmp60, %bb57 ], [ 0, %bb45 ]
%tmp59 = phi i32 [ %tmp61, %bb57 ], [ 0, %bb45 ]
%tmp60 = add nuw i64 %tmp58, 1
tail call void @bar()
%tmp61 = add nuw nsw i32 %tmp59, 1
%tmp62 = load i32, i32* @c, align 4, !tbaa !2
%tmp63 = mul nsw i32 %tmp62, 10
%tmp64 = icmp slt i32 %tmp61, %tmp63
br i1 %tmp64, label %bb57, label %bb65
bb65: ; preds = %bb57
%tmp66 = add i64 %tmp60, %tmp47
br label %bb67
bb67: ; preds = %bb65, %bb45
%tmp68 = phi i32 [ %tmp62, %bb65 ], [ %tmp46, %bb45 ]
%tmp69 = phi i64 [ %tmp66, %bb65 ], [ %tmp47, %bb45 ]
%tmp70 = add nuw i64 %tmp48, 1
%tmp71 = add nuw nsw i32 %tmp49, 1
%tmp72 = icmp slt i32 %tmp71, %tmp68
br i1 %tmp72, label %bb45, label %bb51
bb73: ; preds = %bb73, %bb51
%tmp74 = phi i64 [ %tmp76, %bb73 ], [ 0, %bb51 ]
%tmp75 = phi i32 [ %tmp77, %bb73 ], [ 0, %bb51 ]
%tmp76 = add nuw i64 %tmp74, 1
tail call void @bar()
%tmp77 = add nuw nsw i32 %tmp75, 1
%tmp78 = load i32, i32* @c, align 4, !tbaa !2
%tmp79 = mul nsw i32 %tmp78, 100
%tmp80 = icmp slt i32 %tmp77, %tmp79
br i1 %tmp80, label %bb73, label %bb81
bb81: ; preds = %bb73
br label %bb84
bb84: ; preds = %bb81, %bb51, %bb8, %bb
ret i32 0
}
attributes #0 = { noinline }
attributes #1 = { norecurse nounwind uwtable }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (trunk 307355)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"int", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C/C++ TBAA"}

View File

@ -0,0 +1,101 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -O3 -S | FileCheck %s
; RUN: opt < %s -passes='default<O3>' -aa-pipeline=default -S | FileCheck %s
target triple = "x86_64--"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Ideally, this should reach the backend with 1 fsub, 1 fadd, and 1 shuffle.
; That may require some coordination between VectorCombine, SLP, and other passes.
; The end goal is to get a single "vaddsubps" instruction for x86 with AVX.
define <4 x float> @PR45015(<4 x float> %arg, <4 x float> %arg1) {
; CHECK-LABEL: @PR45015(
; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[ARG:%.*]], [[ARG1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[ARG]], [[ARG1]]
; CHECK-NEXT: [[T16:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: ret <4 x float> [[T16]]
;
%t = extractelement <4 x float> %arg, i32 0
%t2 = extractelement <4 x float> %arg1, i32 0
%t3 = fsub float %t, %t2
%t4 = insertelement <4 x float> poison, float %t3, i32 0
%t5 = extractelement <4 x float> %arg, i32 1
%t6 = extractelement <4 x float> %arg1, i32 1
%t7 = fadd float %t5, %t6
%t8 = insertelement <4 x float> %t4, float %t7, i32 1
%t9 = extractelement <4 x float> %arg, i32 2
%t10 = extractelement <4 x float> %arg1, i32 2
%t11 = fsub float %t9, %t10
%t12 = insertelement <4 x float> %t8, float %t11, i32 2
%t13 = extractelement <4 x float> %arg, i32 3
%t14 = extractelement <4 x float> %arg1, i32 3
%t15 = fadd float %t13, %t14
%t16 = insertelement <4 x float> %t12, float %t15, i32 3
ret <4 x float> %t16
}
; PR42022 - https://bugs.llvm.org/show_bug.cgi?id=42022
%struct.Vector4 = type { float, float, float, float }
define { <2 x float>, <2 x float> } @add_aggregate(<2 x float> %a0, <2 x float> %a1, <2 x float> %b0, <2 x float> %b1) {
; CHECK-LABEL: @add_aggregate(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[A0:%.*]], [[B0:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[A1:%.*]], [[B1:%.*]]
; CHECK-NEXT: [[FCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP1]], 0
; CHECK-NEXT: [[FCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[FCA_0_INSERT]], <2 x float> [[TMP2]], 1
; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[FCA_1_INSERT]]
;
%a00 = extractelement <2 x float> %a0, i32 0
%b00 = extractelement <2 x float> %b0, i32 0
%add = fadd float %a00, %b00
%retval.0.0.insert = insertelement <2 x float> poison, float %add, i32 0
%a01 = extractelement <2 x float> %a0, i32 1
%b01 = extractelement <2 x float> %b0, i32 1
%add4 = fadd float %a01, %b01
%retval.0.1.insert = insertelement <2 x float> %retval.0.0.insert, float %add4, i32 1
%a10 = extractelement <2 x float> %a1, i32 0
%b10 = extractelement <2 x float> %b1, i32 0
%add7 = fadd float %a10, %b10
%retval.1.0.insert = insertelement <2 x float> poison, float %add7, i32 0
%a11 = extractelement <2 x float> %a1, i32 1
%b11 = extractelement <2 x float> %b1, i32 1
%add10 = fadd float %a11, %b11
%retval.1.1.insert = insertelement <2 x float> %retval.1.0.insert, float %add10, i32 1
%fca.0.insert = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> %retval.0.1.insert, 0
%fca.1.insert = insertvalue { <2 x float>, <2 x float> } %fca.0.insert, <2 x float> %retval.1.1.insert, 1
ret { <2 x float>, <2 x float> } %fca.1.insert
}
define void @add_aggregate_store(<2 x float> %a0, <2 x float> %a1, <2 x float> %b0, <2 x float> %b1, %struct.Vector4* nocapture dereferenceable(16) %r) {
; CHECK-LABEL: @add_aggregate_store(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[A0:%.*]], [[B0:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[A1:%.*]], [[B1:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.Vector4* [[R:%.*]] to <4 x float>*
; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
%a00 = extractelement <2 x float> %a0, i32 0
%b00 = extractelement <2 x float> %b0, i32 0
%add = fadd float %a00, %b00
%r0 = getelementptr inbounds %struct.Vector4, %struct.Vector4* %r, i64 0, i32 0
store float %add, float* %r0, align 4
%a01 = extractelement <2 x float> %a0, i32 1
%b01 = extractelement <2 x float> %b0, i32 1
%add4 = fadd float %a01, %b01
%r1 = getelementptr inbounds %struct.Vector4, %struct.Vector4* %r, i64 0, i32 1
store float %add4, float* %r1, align 4
%a10 = extractelement <2 x float> %a1, i32 0
%b10 = extractelement <2 x float> %b1, i32 0
%add7 = fadd float %a10, %b10
%r2 = getelementptr inbounds %struct.Vector4, %struct.Vector4* %r, i64 0, i32 2
store float %add7, float* %r2, align 4
%a11 = extractelement <2 x float> %a1, i32 1
%b11 = extractelement <2 x float> %b1, i32 1
%add10 = fadd float %a11, %b11
%r3 = getelementptr inbounds %struct.Vector4, %struct.Vector4* %r, i64 0, i32 3
store float %add10, float* %r3, align 4
ret void
}

View File

@ -0,0 +1,153 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -O3 -S < %s | FileCheck %s
; RUN: opt -passes='default<O3>' -S < %s | FileCheck %s
target triple = "x86_64--"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; PR41813 - https://bugs.llvm.org/show_bug.cgi?id=41813
define <4 x float> @hadd_reverse_v4f32(<4 x float> %a, <4 x float> %b) #0 {
; CHECK-LABEL: @hadd_reverse_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 3, i32 1, i32 7, i32 5>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 2, i32 0, i32 6, i32 4>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <4 x float> [[TMP3]]
;
%shuffle = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%shuffle1 = shufflevector <4 x float> %b, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%vecext = extractelement <4 x float> %shuffle, i32 0
%vecext2 = extractelement <4 x float> %shuffle, i32 1
%add = fadd float %vecext, %vecext2
%vecinit = insertelement <4 x float> poison, float %add, i32 0
%vecext3 = extractelement <4 x float> %shuffle, i32 2
%vecext4 = extractelement <4 x float> %shuffle, i32 3
%add5 = fadd float %vecext3, %vecext4
%vecinit6 = insertelement <4 x float> %vecinit, float %add5, i32 1
%vecext7 = extractelement <4 x float> %shuffle1, i32 0
%vecext8 = extractelement <4 x float> %shuffle1, i32 1
%add9 = fadd float %vecext7, %vecext8
%vecinit10 = insertelement <4 x float> %vecinit6, float %add9, i32 2
%vecext11 = extractelement <4 x float> %shuffle1, i32 2
%vecext12 = extractelement <4 x float> %shuffle1, i32 3
%add13 = fadd float %vecext11, %vecext12
%vecinit14 = insertelement <4 x float> %vecinit10, float %add13, i32 3
ret <4 x float> %vecinit14
}
define <4 x float> @reverse_hadd_v4f32(<4 x float> %a, <4 x float> %b) #0 {
; CHECK-LABEL: @reverse_hadd_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 6, i32 4>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> [[A]], <4 x i32> <i32 3, i32 1, i32 7, i32 5>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <4 x float> [[TMP3]]
;
%vecext = extractelement <4 x float> %a, i32 0
%vecext1 = extractelement <4 x float> %a, i32 1
%add = fadd float %vecext, %vecext1
%vecinit = insertelement <4 x float> poison, float %add, i32 0
%vecext2 = extractelement <4 x float> %a, i32 2
%vecext3 = extractelement <4 x float> %a, i32 3
%add4 = fadd float %vecext2, %vecext3
%vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
%vecext6 = extractelement <4 x float> %b, i32 0
%vecext7 = extractelement <4 x float> %b, i32 1
%add8 = fadd float %vecext6, %vecext7
%vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2
%vecext10 = extractelement <4 x float> %b, i32 2
%vecext11 = extractelement <4 x float> %b, i32 3
%add12 = fadd float %vecext10, %vecext11
%vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3
%shuffle = shufflevector <4 x float> %vecinit13, <4 x float> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %shuffle
}
define <8 x float> @hadd_reverse_v8f32(<8 x float> %a, <8 x float> %b) #0 {
; CHECK-LABEL: @hadd_reverse_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 7, i32 5, i32 15, i32 13, i32 3, i32 1, i32 11, i32 9>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 6, i32 4, i32 14, i32 12, i32 2, i32 0, i32 10, i32 8>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%shuffle = shufflevector <8 x float> %a, <8 x float> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
%shuffle1 = shufflevector <8 x float> %b, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
%vecext = extractelement <8 x float> %shuffle, i32 0
%vecext2 = extractelement <8 x float> %shuffle, i32 1
%add = fadd float %vecext, %vecext2
%vecinit = insertelement <8 x float> poison, float %add, i32 0
%vecext3 = extractelement <8 x float> %shuffle, i32 2
%vecext4 = extractelement <8 x float> %shuffle, i32 3
%add5 = fadd float %vecext3, %vecext4
%vecinit6 = insertelement <8 x float> %vecinit, float %add5, i32 1
%vecext7 = extractelement <8 x float> %shuffle1, i32 0
%vecext8 = extractelement <8 x float> %shuffle1, i32 1
%add9 = fadd float %vecext7, %vecext8
%vecinit10 = insertelement <8 x float> %vecinit6, float %add9, i32 2
%vecext11 = extractelement <8 x float> %shuffle1, i32 2
%vecext12 = extractelement <8 x float> %shuffle1, i32 3
%add13 = fadd float %vecext11, %vecext12
%vecinit14 = insertelement <8 x float> %vecinit10, float %add13, i32 3
%vecext15 = extractelement <8 x float> %shuffle, i32 4
%vecext16 = extractelement <8 x float> %shuffle, i32 5
%add17 = fadd float %vecext15, %vecext16
%vecinit18 = insertelement <8 x float> %vecinit14, float %add17, i32 4
%vecext19 = extractelement <8 x float> %shuffle, i32 6
%vecext20 = extractelement <8 x float> %shuffle, i32 7
%add21 = fadd float %vecext19, %vecext20
%vecinit22 = insertelement <8 x float> %vecinit18, float %add21, i32 5
%vecext23 = extractelement <8 x float> %shuffle1, i32 4
%vecext24 = extractelement <8 x float> %shuffle1, i32 5
%add25 = fadd float %vecext23, %vecext24
%vecinit26 = insertelement <8 x float> %vecinit22, float %add25, i32 6
%vecext27 = extractelement <8 x float> %shuffle1, i32 6
%vecext28 = extractelement <8 x float> %shuffle1, i32 7
%add29 = fadd float %vecext27, %vecext28
%vecinit30 = insertelement <8 x float> %vecinit26, float %add29, i32 7
ret <8 x float> %vecinit30
}
define <8 x float> @reverse_hadd_v8f32(<8 x float> %a, <8 x float> %b) #0 {
; CHECK-LABEL: @reverse_hadd_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x float> [[SHUFFLE]]
;
%vecext = extractelement <8 x float> %a, i32 0
%vecext1 = extractelement <8 x float> %a, i32 1
%add = fadd float %vecext, %vecext1
%vecinit = insertelement <8 x float> poison, float %add, i32 0
%vecext2 = extractelement <8 x float> %a, i32 2
%vecext3 = extractelement <8 x float> %a, i32 3
%add4 = fadd float %vecext2, %vecext3
%vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 1
%vecext6 = extractelement <8 x float> %b, i32 0
%vecext7 = extractelement <8 x float> %b, i32 1
%add8 = fadd float %vecext6, %vecext7
%vecinit9 = insertelement <8 x float> %vecinit5, float %add8, i32 2
%vecext10 = extractelement <8 x float> %b, i32 2
%vecext11 = extractelement <8 x float> %b, i32 3
%add12 = fadd float %vecext10, %vecext11
%vecinit13 = insertelement <8 x float> %vecinit9, float %add12, i32 3
%vecext14 = extractelement <8 x float> %a, i32 4
%vecext15 = extractelement <8 x float> %a, i32 5
%add16 = fadd float %vecext14, %vecext15
%vecinit17 = insertelement <8 x float> %vecinit13, float %add16, i32 4
%vecext18 = extractelement <8 x float> %a, i32 6
%vecext19 = extractelement <8 x float> %a, i32 7
%add20 = fadd float %vecext18, %vecext19
%vecinit21 = insertelement <8 x float> %vecinit17, float %add20, i32 5
%vecext22 = extractelement <8 x float> %b, i32 4
%vecext23 = extractelement <8 x float> %b, i32 5
%add24 = fadd float %vecext22, %vecext23
%vecinit25 = insertelement <8 x float> %vecinit21, float %add24, i32 6
%vecext26 = extractelement <8 x float> %b, i32 6
%vecext27 = extractelement <8 x float> %b, i32 7
%add28 = fadd float %vecext26, %vecext27
%vecinit29 = insertelement <8 x float> %vecinit25, float %add28, i32 7
%shuffle = shufflevector <8 x float> %vecinit29, <8 x float> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x float> %shuffle
}
attributes #0 = { "min-legal-vector-width"="128" "target-cpu"="btver2" "target-features"="+avx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3" }

View File

@ -0,0 +1,71 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -O3 -S < %s | FileCheck %s
; RUN: opt -passes='default<O3>' -S < %s | FileCheck %s
target triple = "x86_64--"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; PR42174 - https://bugs.llvm.org/show_bug.cgi?id=42174
; This test should match the IR produced by clang after running -mem2reg.
; All math before the final 'add' should be scalarized.
define <4 x i32> @square(<4 x i32> %num, i32 %y, i32 %x, i32 %h, i32 %k, i32 %w, i32 %p, i32 %j, i32 %u) {
; CHECK-LABEL: @square(
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[K:%.*]], 2
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[P:%.*]], 6234
; CHECK-NEXT: [[MUL5:%.*]] = mul nsw i32 [[H:%.*]], 75
; CHECK-NEXT: [[DIV9:%.*]] = sdiv i32 [[J:%.*]], 3452
; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[W:%.*]], 53
; CHECK-NEXT: [[DIV17:%.*]] = sdiv i32 [[X:%.*]], 820
; CHECK-NEXT: [[MUL21:%.*]] = shl nsw i32 [[U:%.*]], 2
; CHECK-NEXT: [[DOTSCALAR:%.*]] = add i32 [[Y:%.*]], 1
; CHECK-NEXT: [[DOTSCALAR1:%.*]] = add i32 [[DOTSCALAR]], [[DIV17]]
; CHECK-NEXT: [[DOTSCALAR2:%.*]] = add i32 [[DOTSCALAR1]], [[MUL5]]
; CHECK-NEXT: [[DOTSCALAR3:%.*]] = add i32 [[DOTSCALAR2]], [[DIV]]
; CHECK-NEXT: [[DOTSCALAR4:%.*]] = add i32 [[DOTSCALAR3]], [[MUL13]]
; CHECK-NEXT: [[DOTSCALAR5:%.*]] = add i32 [[DOTSCALAR4]], [[MUL]]
; CHECK-NEXT: [[DOTSCALAR6:%.*]] = add i32 [[DOTSCALAR5]], [[DIV9]]
; CHECK-NEXT: [[DOTSCALAR7:%.*]] = add i32 [[DOTSCALAR6]], [[MUL21]]
; CHECK-NEXT: [[DOTSCALAR8:%.*]] = add i32 [[DOTSCALAR7]], 317425
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[DOTSCALAR8]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP2]], [[NUM:%.*]]
; CHECK-NEXT: ret <4 x i32> [[ADD29]]
;
%add = add <4 x i32> %num, <i32 1, i32 1, i32 1, i32 1>
%div = sdiv i32 %k, 2
%splatinsert = insertelement <4 x i32> poison, i32 %div, i32 0
%splat = shufflevector <4 x i32> %splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%add1 = add <4 x i32> %add, %splat
%mul = mul nsw i32 %p, 6234
%splatinsert2 = insertelement <4 x i32> poison, i32 %mul, i32 0
%splat3 = shufflevector <4 x i32> %splatinsert2, <4 x i32> undef, <4 x i32> zeroinitializer
%add4 = add <4 x i32> %add1, %splat3
%mul5 = mul nsw i32 75, %h
%splatinsert6 = insertelement <4 x i32> poison, i32 %mul5, i32 0
%splat7 = shufflevector <4 x i32> %splatinsert6, <4 x i32> undef, <4 x i32> zeroinitializer
%add8 = add <4 x i32> %add4, %splat7
%div9 = sdiv i32 %j, 3452
%splatinsert10 = insertelement <4 x i32> poison, i32 %div9, i32 0
%splat11 = shufflevector <4 x i32> %splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
%add12 = add <4 x i32> %add8, %splat11
%mul13 = mul nsw i32 53, %w
%splatinsert14 = insertelement <4 x i32> poison, i32 %mul13, i32 0
%splat15 = shufflevector <4 x i32> %splatinsert14, <4 x i32> undef, <4 x i32> zeroinitializer
%add16 = add <4 x i32> %add12, %splat15
%div17 = sdiv i32 %x, 820
%splatinsert18 = insertelement <4 x i32> poison, i32 %div17, i32 0
%splat19 = shufflevector <4 x i32> %splatinsert18, <4 x i32> undef, <4 x i32> zeroinitializer
%add20 = add <4 x i32> %add16, %splat19
%mul21 = mul nsw i32 4, %u
%splatinsert22 = insertelement <4 x i32> poison, i32 %mul21, i32 0
%splat23 = shufflevector <4 x i32> %splatinsert22, <4 x i32> undef, <4 x i32> zeroinitializer
%add24 = add <4 x i32> %add20, %splat23
%splatinsert25 = insertelement <4 x i32> poison, i32 %y, i32 0
%splat26 = shufflevector <4 x i32> %splatinsert25, <4 x i32> undef, <4 x i32> zeroinitializer
%add27 = add <4 x i32> %add24, %splat26
%add28 = add <4 x i32> %add27, <i32 25, i32 25, i32 25, i32 25>
%add29 = add <4 x i32> %add28, <i32 317400, i32 317400, i32 317400, i32 317400>
ret <4 x i32> %add29
}

View File

@ -0,0 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -O2 -S -data-layout="e" < %s | FileCheck %s
; RUN: opt -passes='default<O2>' -S -data-layout="e" < %s | FileCheck %s
define <4 x i16> @truncate(<4 x i32> %x) {
; CHECK-LABEL: @truncate(
; CHECK-NEXT: [[V3:%.*]] = trunc <4 x i32> [[X:%.*]] to <4 x i16>
; CHECK-NEXT: ret <4 x i16> [[V3]]
;
%x0 = extractelement <4 x i32> %x, i32 0
%t0 = trunc i32 %x0 to i16
%v0 = insertelement <4 x i16> poison, i16 %t0, i32 0
%x1 = extractelement <4 x i32> %x, i32 1
%t1 = trunc i32 %x1 to i16
%v1 = insertelement <4 x i16> %v0, i16 %t1, i32 1
%x2 = extractelement <4 x i32> %x, i32 2
%t2 = trunc i32 %x2 to i16
%v2 = insertelement <4 x i16> %v1, i16 %t2, i32 2
%x3 = extractelement <4 x i32> %x, i32 3
%t3 = trunc i32 %x3 to i16
%v3 = insertelement <4 x i16> %v2, i16 %t3, i32 3
ret <4 x i16> %v3
}

View File

@ -0,0 +1,279 @@
; RUN: opt < %s -rewrite-statepoints-for-gc -S | FileCheck %s
; RUN: opt < %s -passes=rewrite-statepoints-for-gc -S | FileCheck %s
define i64 addrspace(1)* @test(<2 x i64 addrspace(1)*> %vec, i32 %idx) gc "statepoint-example" {
; CHECK-LABEL: @test
; CHECK: extractelement
; CHECK: extractelement
; CHECK: statepoint
; CHECK: gc.relocate
; CHECK-DAG: ; (%base_ee, %obj)
; CHECK: gc.relocate
; CHECK-DAG: ; (%base_ee, %base_ee)
; Note that the second extractelement is actually redundant here. A correct output would
; be to reuse the existing obj as a base since it is actually a base pointer.
entry:
%obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx
call void @do_safepoint() [ "deopt"() ]
ret i64 addrspace(1)* %obj
}
define i64 addrspace(1)* @test2(<2 x i64 addrspace(1)*>* %ptr, i1 %cnd, i32 %idx1, i32 %idx2) gc "statepoint-example" {
; CHECK-LABEL: test2
entry:
br i1 %cnd, label %taken, label %untaken
taken: ; preds = %entry
%obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
br label %merge
untaken: ; preds = %entry
%objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
br label %merge
merge: ; preds = %untaken, %taken
%vec = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
br i1 %cnd, label %taken2, label %untaken2
taken2: ; preds = %merge
%obj0 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx1
br label %merge2
untaken2: ; preds = %merge
%obj1 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx2
br label %merge2
merge2: ; preds = %untaken2, %taken2
; CHECK-LABEL: merge2:
; CHECK: %obj.base = phi i64 addrspace(1)*
; CHECK: %obj = phi i64 addrspace(1)*
; CHECK: statepoint
; CHECK: gc.relocate
; CHECK-DAG: ; (%obj.base, %obj)
; CHECK: gc.relocate
; CHECK-DAG: ; (%obj.base, %obj.base)
%obj = phi i64 addrspace(1)* [ %obj0, %taken2 ], [ %obj1, %untaken2 ]
call void @do_safepoint() [ "deopt"() ]
ret i64 addrspace(1)* %obj
}
define i64 addrspace(1)* @test3(i64 addrspace(1)* %ptr) gc "statepoint-example" {
; CHECK-LABEL: test3
; CHECK: insertelement
; CHECK: extractelement
; CHECK: statepoint
; CHECK: gc.relocate
; CHECK-DAG: (%obj.base, %obj)
entry:
%vec = insertelement <2 x i64 addrspace(1)*> poison, i64 addrspace(1)* %ptr, i32 0
%obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
call void @do_safepoint() [ "deopt"() ]
ret i64 addrspace(1)* %obj
}
define i64 addrspace(1)* @test4(i64 addrspace(1)* %ptr) gc "statepoint-example" {
; CHECK-LABEL: test4
; CHECK: statepoint
; CHECK: gc.relocate
; CHECK-DAG: ; (%obj.base, %obj)
; When we can optimize an extractelement from a known
; index and avoid introducing new base pointer instructions
entry:
%derived = getelementptr i64, i64 addrspace(1)* %ptr, i64 16
%veca = insertelement <2 x i64 addrspace(1)*> poison, i64 addrspace(1)* %derived, i32 0
%vec = insertelement <2 x i64 addrspace(1)*> %veca, i64 addrspace(1)* %ptr, i32 1
%obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
call void @do_safepoint() [ "deopt"() ]
ret i64 addrspace(1)* %obj
}
declare void @use(i64 addrspace(1)*) "gc-leaf-function"
declare void @use_vec(<4 x i64 addrspace(1)*>) "gc-leaf-function"
define void @test5(i1 %cnd, i64 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-LABEL: @test5
; CHECK: gc.relocate
; CHECK-DAG: (%bdv.base, %bdv)
; When we fundementally have to duplicate
entry:
%gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
%vec = insertelement <2 x i64 addrspace(1)*> poison, i64 addrspace(1)* %gep, i32 0
%bdv = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
call void @use(i64 addrspace(1)* %bdv)
ret void
}
define void @test6(i1 %cnd, i64 addrspace(1)* %obj, i64 %idx) gc "statepoint-example" {
; CHECK-LABEL: @test6
; CHECK: %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
; CHECK: %vec.base = insertelement <2 x i64 addrspace(1)*> zeroinitializer, i64 addrspace(1)* %obj, i32 0, !is_base_value !0
; CHECK: %vec = insertelement <2 x i64 addrspace(1)*> poison, i64 addrspace(1)* %gep, i32 0
; CHECK: %bdv.base = extractelement <2 x i64 addrspace(1)*> %vec.base, i64 %idx, !is_base_value !0
; CHECK: %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
; CHECK: gc.statepoint
; CHECK: gc.relocate
; CHECK-DAG: (%bdv.base, %bdv)
; A more complicated example involving vector and scalar bases.
; This is derived from a failing test case when we didn't have correct
; insertelement handling.
entry:
%gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
%vec = insertelement <2 x i64 addrspace(1)*> poison, i64 addrspace(1)* %gep, i32 0
%bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
call void @use(i64 addrspace(1)* %bdv)
ret void
}
define i64 addrspace(1)* @test7(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
; CHECK-LABEL: @test7
entry:
%vec = insertelement <2 x i64 addrspace(1)*> poison, i64 addrspace(1)* %obj2, i32 0
br label %merge1
merge1: ; preds = %merge1, %entry
; CHECK-LABEL: merge1:
; CHECK: vec2.base
; CHECK: vec2
; CHECK: gep
; CHECK: vec3.base
; CHECK: vec3
%vec2 = phi <2 x i64 addrspace(1)*> [ %vec, %entry ], [ %vec3, %merge1 ]
%gep = getelementptr i64, i64 addrspace(1)* %obj2, i64 1
%vec3 = insertelement <2 x i64 addrspace(1)*> poison, i64 addrspace(1)* %gep, i32 0
br i1 %cnd, label %merge1, label %next1
next1: ; preds = %merge1
; CHECK-LABEL: next1:
; CHECK: bdv.base =
; CHECK: bdv =
%bdv = extractelement <2 x i64 addrspace(1)*> %vec2, i32 0
br label %merge
merge: ; preds = %merge, %next1
; CHECK-LABEL: merge:
; CHECK: %objb.base
; CHECK: %objb
; CHECK: gc.statepoint
; CHECK: gc.relocate
; CHECK-DAG: (%objb.base, %objb)
%objb = phi i64 addrspace(1)* [ %obj, %next1 ], [ %bdv, %merge ]
br i1 %cnd, label %merge, label %next
next: ; preds = %merge
call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i64 addrspace(1)* %objb
}
; identify base for shufflevector
define void @test8(i64 addrspace(1)* %obj, i64 %idx) gc "statepoint-example" {
; CHECK-LABEL: @test8
; CHECK: %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
; CHECK: %gep2 = getelementptr i64, i64 addrspace(1)* %obj, i64 2
; CHECK: %vec1.base = insertelement <4 x i64 addrspace(1)*> zeroinitializer, i64 addrspace(1)* %obj, i32 0, !is_base_value !0
; CHECK: %vec1 = insertelement <4 x i64 addrspace(1)*> poison, i64 addrspace(1)* %gep, i32 0
; CHECK: %vec2.base = insertelement <4 x i64 addrspace(1)*> zeroinitializer, i64 addrspace(1)* %obj, i32 2, !is_base_value !0
; CHECK: %vec2 = insertelement <4 x i64 addrspace(1)*> poison, i64 addrspace(1)* %gep2, i32 2
; CHECK: %vec.base = shufflevector <4 x i64 addrspace(1)*> %vec1.base, <4 x i64 addrspace(1)*> %vec2.base, <2 x i32> <i32 0, i32 2>, !is_base_value !0
; CHECK: %vec = shufflevector <4 x i64 addrspace(1)*> %vec1, <4 x i64 addrspace(1)*> %vec2, <2 x i32> <i32 0, i32 2>
; CHECK: %bdv.base = extractelement <2 x i64 addrspace(1)*> %vec.base, i64 %idx, !is_base_value !0
; CHECK: %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
; CHECK: gc.statepoint
; CHECK: gc.relocate
; CHECK-DAG: (%bdv.base, %bdv)
entry:
%gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
%gep2 = getelementptr i64, i64 addrspace(1)* %obj, i64 2
%vec1 = insertelement <4 x i64 addrspace(1)*> poison, i64 addrspace(1)* %gep, i32 0
%vec2 = insertelement <4 x i64 addrspace(1)*> poison, i64 addrspace(1)* %gep2, i32 2
%vec = shufflevector <4 x i64 addrspace(1)*> %vec1, <4 x i64 addrspace(1)*> %vec2, <2 x i32> <i32 0, i32 2>
%bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
call void @use(i64 addrspace(1)* %bdv)
ret void
}
; Since the same 'base' vector is used in the shuffle operands, we do not need
; create a shufflevector base.
define void @test9(<4 x i64 addrspace(1)*> %vec1, i64 %idx) gc "statepoint-example" {
; CHECK-LABEL: @test9
; CHECK: %vec = shufflevector <4 x i64 addrspace(1)*> %vec1, <4 x i64 addrspace(1)*> %vec1, <2 x i32> <i32 0, i32 2>
; CHECK: %base_ee = extractelement <4 x i64 addrspace(1)*> %vec1, i64 %idx, !is_base_value !0
; CHECK: %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
; CHECK: gc.statepoint
; CHECK: gc.relocate
; CHECK-DAG: (%base_ee, %bdv)
entry:
; shrinking vec1 into vec
%vec = shufflevector <4 x i64 addrspace(1)*> %vec1, <4 x i64 addrspace(1)*> %vec1, <2 x i32> <i32 0, i32 2>
%bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
call void @use(i64 addrspace(1)* %bdv)
ret void
}
; vector operand of shufflevector is a phi
define i64 addrspace(1)* @test10(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
; CHECK-LABEL: @test10
entry:
%vec1 = insertelement <4 x i64 addrspace(1)*> poison, i64 addrspace(1)* %obj, i32 0
br i1 %cnd, label %here, label %merge
here:
%vec2 = insertelement <4 x i64 addrspace(1)*> poison, i64 addrspace(1)* %obj2, i32 2
br label %merge
merge: ; preds = %merge, %entry, %here
; CHECK-LABEL: merge:
; CHECK: %vec.base = phi <4 x i64 addrspace(1)*> [ %vec1.base, %entry ], [ %vec2.base, %here ], [ %vec3.base, %merge ], !is_base_value !0
; CHECK: vec
; CHECK: vec3.base = shufflevector <4 x i64 addrspace(1)*> %vec.base, <4 x i64 addrspace(1)*> %vec.base
; CHECK: vec3
; CHECK: bdv.base
; CHECK: bdv
%vec = phi <4 x i64 addrspace(1)*> [ %vec1, %entry ], [ %vec2, %here], [ %vec3, %merge]
%vec3 = shufflevector <4 x i64 addrspace(1)*> %vec, <4 x i64 addrspace(1)*> %vec, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
%bdv = extractelement <4 x i64 addrspace(1)*> %vec3, i32 0
br i1 %cnd, label %merge, label %next
next:
; CHECK-LABEL: next:
; CHECK: gc.statepoint
; CHECK: gc.relocate
; CHECK-DAG: (%bdv.base, %bdv)
call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i64 addrspace(1)* %bdv
}
declare void @do_safepoint()
define void @test11(<4 x i64 addrspace(1)*> %vec1) gc "statepoint-example" {
; CHECK-LABEL: @test11(
; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf{{.*}}<4 x i64 addrspace(1)*> %vec1)
; CHECK: %vec1.relocated = call coldcc <4 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v4p1i8
; CHECK: %vec1.relocated.casted = bitcast <4 x i8 addrspace(1)*> %vec1.relocated to <4 x i64 addrspace(1)*>
; CHECK: %vec2.remat = getelementptr i64, <4 x i64 addrspace(1)*> %vec1.relocated.casted, i32 1024
; CHECK: call void @use_vec(<4 x i64 addrspace(1)*> %vec2.remat)
entry:
%vec2 = getelementptr i64, <4 x i64 addrspace(1)*> %vec1, i32 1024
call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
call void @use_vec(<4 x i64 addrspace(1) *> %vec2)
ret void
}
declare <4 x i64 addrspace(1)*> @def_vec() "gc-leaf-function"
define void @test12(<4 x i64 addrspace(1)*> %vec1) gc "statepoint-example" {
; CHECK-LABEL: @test12(
; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf{{.*}}<4 x i64 addrspace(1)*> %vec)
; CHECK-NEXT: %vec.relocated = call coldcc <4 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v4p1i8(
; CHECK-NEXT: %vec.relocated.casted = bitcast <4 x i8 addrspace(1)*> %vec.relocated to <4 x i64 addrspace(1)*>
; CHECK-NEXT: call void @use_vec(<4 x i64 addrspace(1)*> %vec.relocated.casted)
; CHECK-NEXT: ret void
entry:
%vec = call <4 x i64 addrspace(1)*> @def_vec()
call void @do_safepoint() [ "deopt"() ]
call void @use_vec(<4 x i64 addrspace(1)*> %vec)
ret void
}

View File

@ -0,0 +1,38 @@
; RUN: opt -S -rewrite-statepoints-for-gc < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
target triple = "x86_64-unknown-linux-gnu"
declare void @f()
declare void @g(i8 addrspace(1)*, i8 addrspace(1)*)
declare i32 @personality_function()
; Make sure that we do not fail assertion because we process call of @g before
; we process the call of @f.
define void @test_01(i8 addrspace(1)* %p, i1 %cond) gc "statepoint-example" personality i32 ()* @personality_function {
; CHECK-LABEL: @test_01(
entry:
%tmp0 = insertelement <2 x i8 addrspace(1)*> poison, i8 addrspace(1)* %p, i32 0
%tmp1 = insertelement <2 x i8 addrspace(1)*> %tmp0, i8 addrspace(1)* %p, i32 1
%tmp2 = extractelement <2 x i8 addrspace(1)*> %tmp1, i32 1
%tmp3 = extractelement <2 x i8 addrspace(1)*> %tmp1, i32 0
br label %loop
loop:
br i1 %cond, label %cond_block, label %exit
cond_block:
br i1 %cond, label %backedge, label %exit
exit:
%tmp4 = phi i8 addrspace(1)* [ %tmp2, %loop ], [ %tmp2, %cond_block ]
call void @g(i8 addrspace(1)* %tmp3, i8 addrspace(1)* %tmp4)
ret void
backedge:
call void @f()
br label %loop
}

View File

@ -0,0 +1,119 @@
; Test that we can correctly handle vectors of pointers in statepoint
; rewriting.
; RUN: opt < %s -rewrite-statepoints-for-gc -S | FileCheck %s
; RUN: opt < %s -passes=rewrite-statepoints-for-gc -S | FileCheck %s
; A non-vector relocation for comparison
define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-LABEL: test
; CHECK: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: ret i64 addrspace(1)*
; A base vector from a argument
entry:
call void @do_safepoint() [ "deopt"() ]
ret i64 addrspace(1)* %obj
}
; A vector argument
define <2 x i64 addrspace(1)*> @test2(<2 x i64 addrspace(1)*> %obj) gc "statepoint-example" {
; CHECK-LABEL: test2
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
call void @do_safepoint() [ "deopt"() ]
ret <2 x i64 addrspace(1)*> %obj
}
; A load
define <2 x i64 addrspace(1)*> @test3(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
; CHECK-LABEL: test3
; CHECK: load
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
entry:
%obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
call void @do_safepoint() [ "deopt"() ]
ret <2 x i64 addrspace(1)*> %obj
}
declare i32 @fake_personality_function()
; When a statepoint is an invoke rather than a call
define <2 x i64 addrspace(1)*> @test4(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" personality i32 ()* @fake_personality_function {
; CHECK-LABEL: test4
; CHECK: load
; CHECK-NEXT: gc.statepoint
entry:
%obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
invoke void @do_safepoint() [ "deopt"() ]
to label %normal_return unwind label %exceptional_return
normal_return: ; preds = %entry
; CHECK-LABEL: normal_return:
; CHECK: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
ret <2 x i64 addrspace(1)*> %obj
exceptional_return: ; preds = %entry
; CHECK-LABEL: exceptional_return:
; CHECK: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
%landing_pad4 = landingpad token
cleanup
ret <2 x i64 addrspace(1)*> %obj
}
; A newly created vector
define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p) gc "statepoint-example" {
; CHECK-LABEL: test5
; CHECK: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %vec.relocated.casted
entry:
%vec = insertelement <2 x i64 addrspace(1)*> poison, i64 addrspace(1)* %p, i32 0
call void @do_safepoint() [ "deopt"() ]
ret <2 x i64 addrspace(1)*> %vec
}
; A merge point
define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
; CHECK-LABEL: test6
entry:
br i1 %cnd, label %taken, label %untaken
taken: ; preds = %entry
%obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
br label %merge
untaken: ; preds = %entry
%objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
br label %merge
merge: ; preds = %untaken, %taken
; CHECK-LABEL: merge:
; CHECK-NEXT: = phi
; CHECK-NEXT: = phi
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
%obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
call void @do_safepoint() [ "deopt"() ]
ret <2 x i64 addrspace(1)*> %obj
}
declare void @do_safepoint()

View File

@ -0,0 +1,44 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -S 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; WARN-NOT: warning
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
define <2 x float> @insertelement-fixed-vector() {
; CHECK-LABEL: @insertelement-fixed-vector(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x float> [[I0]], float [[TMP3]], i32 1
; CHECK-NEXT: ret <2 x float> [[I1]]
;
%f0 = tail call fast float @llvm.fabs.f32(float undef)
%f1 = tail call fast float @llvm.fabs.f32(float undef)
%i0 = insertelement <2 x float> poison, float %f0, i32 0
%i1 = insertelement <2 x float> %i0, float %f1, i32 1
ret <2 x float> %i1
}
; TODO: llvm.fabs could be optimized in vector form. It's legal to extract
; elements from fixed-length vector and insert into scalable vector.
define <vscale x 2 x float> @insertelement-scalable-vector() {
; CHECK-LABEL: @insertelement-scalable-vector(
; CHECK-NEXT: [[F0:%.*]] = tail call fast float @llvm.fabs.f32(float undef)
; CHECK-NEXT: [[F1:%.*]] = tail call fast float @llvm.fabs.f32(float undef)
; CHECK-NEXT: [[I0:%.*]] = insertelement <vscale x 2 x float> poison, float [[F0]], i32 0
; CHECK-NEXT: [[I1:%.*]] = insertelement <vscale x 2 x float> [[I0]], float [[F1]], i32 1
; CHECK-NEXT: ret <vscale x 2 x float> [[I1]]
;
%f0 = tail call fast float @llvm.fabs.f32(float undef)
%f1 = tail call fast float @llvm.fabs.f32(float undef)
%i0 = insertelement <vscale x 2 x float> poison, float %f0, i32 0
%i1 = insertelement <vscale x 2 x float> %i0, float %f1, i32 1
ret <vscale x 2 x float> %i1
}
; Function Attrs: nounwind readnone speculatable willreturn
declare float @llvm.fabs.f32(float)

View File

@ -0,0 +1,294 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -instcombine -S | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
; CHECK-LABEL: @build_vec_v2i64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[V0:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[V1:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i64> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP8]], [[TMP5]]
; CHECK-NEXT: ret <2 x i64> [[TMP9]]
;
%v0.0 = extractelement <2 x i64> %v0, i32 0
%v0.1 = extractelement <2 x i64> %v0, i32 1
%v1.0 = extractelement <2 x i64> %v1, i32 0
%v1.1 = extractelement <2 x i64> %v1, i32 1
%tmp0.0 = add i64 %v0.0, %v1.0
%tmp0.1 = add i64 %v0.1, %v1.1
%tmp1.0 = sub i64 %v0.0, %v1.0
%tmp1.1 = sub i64 %v0.1, %v1.1
%tmp2.0 = add i64 %tmp0.0, %tmp0.1
%tmp2.1 = add i64 %tmp1.0, %tmp1.1
%tmp3.0 = insertelement <2 x i64> poison, i64 %tmp2.0, i32 0
%tmp3.1 = insertelement <2 x i64> %tmp3.0, i64 %tmp2.1, i32 1
ret <2 x i64> %tmp3.1
}
define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
; CHECK-LABEL: @store_chain_v2i64(
; CHECK-NEXT: [[A_1:%.*]] = getelementptr i64, i64* [[A:%.*]], i64 1
; CHECK-NEXT: [[B_1:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 1
; CHECK-NEXT: [[C_1:%.*]] = getelementptr i64, i64* [[C:%.*]], i64 1
; CHECK-NEXT: [[V0_0:%.*]] = load i64, i64* [[A]], align 8
; CHECK-NEXT: [[V0_1:%.*]] = load i64, i64* [[A_1]], align 8
; CHECK-NEXT: [[V1_0:%.*]] = load i64, i64* [[B]], align 8
; CHECK-NEXT: [[V1_1:%.*]] = load i64, i64* [[B_1]], align 8
; CHECK-NEXT: [[TMP0_0:%.*]] = add i64 [[V0_0]], [[V1_0]]
; CHECK-NEXT: [[TMP0_1:%.*]] = add i64 [[V0_1]], [[V1_1]]
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i64 [[V0_0]], [[V1_0]]
; CHECK-NEXT: [[TMP1_1:%.*]] = sub i64 [[V0_1]], [[V1_1]]
; CHECK-NEXT: [[TMP2_0:%.*]] = add i64 [[TMP0_0]], [[TMP0_1]]
; CHECK-NEXT: [[TMP2_1:%.*]] = add i64 [[TMP1_0]], [[TMP1_1]]
; CHECK-NEXT: store i64 [[TMP2_0]], i64* [[C]], align 8
; CHECK-NEXT: store i64 [[TMP2_1]], i64* [[C_1]], align 8
; CHECK-NEXT: ret void
;
%a.0 = getelementptr i64, i64* %a, i64 0
%a.1 = getelementptr i64, i64* %a, i64 1
%b.0 = getelementptr i64, i64* %b, i64 0
%b.1 = getelementptr i64, i64* %b, i64 1
%c.0 = getelementptr i64, i64* %c, i64 0
%c.1 = getelementptr i64, i64* %c, i64 1
%v0.0 = load i64, i64* %a.0, align 8
%v0.1 = load i64, i64* %a.1, align 8
%v1.0 = load i64, i64* %b.0, align 8
%v1.1 = load i64, i64* %b.1, align 8
%tmp0.0 = add i64 %v0.0, %v1.0
%tmp0.1 = add i64 %v0.1, %v1.1
%tmp1.0 = sub i64 %v0.0, %v1.0
%tmp1.1 = sub i64 %v0.1, %v1.1
%tmp2.0 = add i64 %tmp0.0, %tmp0.1
%tmp2.1 = add i64 %tmp1.0, %tmp1.1
store i64 %tmp2.0, i64* %c.0, align 8
store i64 %tmp2.1, i64* %c.1, align 8
ret void
}
define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @build_vec_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP5]]
; CHECK-NEXT: ret <4 x i32> [[TMP9]]
;
%v0.0 = extractelement <4 x i32> %v0, i32 0
%v0.1 = extractelement <4 x i32> %v0, i32 1
%v0.2 = extractelement <4 x i32> %v0, i32 2
%v0.3 = extractelement <4 x i32> %v0, i32 3
%v1.0 = extractelement <4 x i32> %v1, i32 0
%v1.1 = extractelement <4 x i32> %v1, i32 1
%v1.2 = extractelement <4 x i32> %v1, i32 2
%v1.3 = extractelement <4 x i32> %v1, i32 3
%tmp0.0 = add i32 %v0.0, %v1.0
%tmp0.1 = add i32 %v0.1, %v1.1
%tmp0.2 = add i32 %v0.2, %v1.2
%tmp0.3 = add i32 %v0.3, %v1.3
%tmp1.0 = sub i32 %v0.0, %v1.0
%tmp1.1 = sub i32 %v0.1, %v1.1
%tmp1.2 = sub i32 %v0.2, %v1.2
%tmp1.3 = sub i32 %v0.3, %v1.3
%tmp2.0 = add i32 %tmp0.0, %tmp0.1
%tmp2.1 = add i32 %tmp1.0, %tmp1.1
%tmp2.2 = add i32 %tmp0.2, %tmp0.3
%tmp2.3 = add i32 %tmp1.2, %tmp1.3
%tmp3.0 = insertelement <4 x i32> poison, i32 %tmp2.0, i32 0
%tmp3.1 = insertelement <4 x i32> %tmp3.0, i32 %tmp2.1, i32 1
%tmp3.2 = insertelement <4 x i32> %tmp3.1, i32 %tmp2.2, i32 2
%tmp3.3 = insertelement <4 x i32> %tmp3.2, i32 %tmp2.3, i32 3
ret <4 x i32> %tmp3.3
}
define <4 x i32> @build_vec_v4i32_reuse_0(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-LABEL: @build_vec_v4i32_reuse_0(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[V1:%.*]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP8]], [[TMP5]]
; CHECK-NEXT: [[TMP3_3:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: ret <4 x i32> [[TMP3_3]]
;
%v0.0 = extractelement <2 x i32> %v0, i32 0
%v0.1 = extractelement <2 x i32> %v0, i32 1
%v1.0 = extractelement <2 x i32> %v1, i32 0
%v1.1 = extractelement <2 x i32> %v1, i32 1
%tmp0.0 = add i32 %v0.0, %v1.0
%tmp0.1 = add i32 %v0.1, %v1.1
%tmp1.0 = sub i32 %v0.0, %v1.0
%tmp1.1 = sub i32 %v0.1, %v1.1
%tmp2.0 = add i32 %tmp0.0, %tmp0.1
%tmp2.1 = add i32 %tmp1.0, %tmp1.1
%tmp3.0 = insertelement <4 x i32> poison, i32 %tmp2.0, i32 0
%tmp3.1 = insertelement <4 x i32> %tmp3.0, i32 %tmp2.1, i32 1
%tmp3.2 = insertelement <4 x i32> %tmp3.1, i32 %tmp2.0, i32 2
%tmp3.3 = insertelement <4 x i32> %tmp3.2, i32 %tmp2.1, i32 3
ret <4 x i32> %tmp3.3
}
define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-LABEL: @build_vec_v4i32_reuse_1(
; CHECK-NEXT: [[V0_0:%.*]] = extractelement <2 x i32> [[V0:%.*]], i32 0
; CHECK-NEXT: [[V0_1:%.*]] = extractelement <2 x i32> [[V0]], i32 1
; CHECK-NEXT: [[V1_0:%.*]] = extractelement <2 x i32> [[V1:%.*]], i32 0
; CHECK-NEXT: [[V1_1:%.*]] = extractelement <2 x i32> [[V1]], i32 1
; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V1_0]]
; CHECK-NEXT: [[TMP0_1:%.*]] = add i32 [[V0_1]], [[V1_1]]
; CHECK-NEXT: [[TMP0_2:%.*]] = xor i32 [[V0_0]], [[V1_0]]
; CHECK-NEXT: [[TMP0_3:%.*]] = xor i32 [[V0_1]], [[V1_1]]
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i32 [[TMP0_0]], [[TMP0_1]]
; CHECK-NEXT: [[TMP1_1:%.*]] = sub i32 [[TMP0_0]], [[TMP0_1]]
; CHECK-NEXT: [[TMP1_2:%.*]] = sub i32 [[TMP0_2]], [[TMP0_3]]
; CHECK-NEXT: [[TMP1_3:%.*]] = sub i32 [[TMP0_3]], [[TMP0_2]]
; CHECK-NEXT: [[TMP2_0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1_0]], i32 0
; CHECK-NEXT: [[TMP2_1:%.*]] = insertelement <4 x i32> [[TMP2_0]], i32 [[TMP1_1]], i32 1
; CHECK-NEXT: [[TMP2_2:%.*]] = insertelement <4 x i32> [[TMP2_1]], i32 [[TMP1_2]], i32 2
; CHECK-NEXT: [[TMP2_3:%.*]] = insertelement <4 x i32> [[TMP2_2]], i32 [[TMP1_3]], i32 3
; CHECK-NEXT: ret <4 x i32> [[TMP2_3]]
;
%v0.0 = extractelement <2 x i32> %v0, i32 0
%v0.1 = extractelement <2 x i32> %v0, i32 1
%v1.0 = extractelement <2 x i32> %v1, i32 0
%v1.1 = extractelement <2 x i32> %v1, i32 1
%tmp0.0 = add i32 %v0.0, %v1.0
%tmp0.1 = add i32 %v0.1, %v1.1
%tmp0.2 = xor i32 %v0.0, %v1.0
%tmp0.3 = xor i32 %v0.1, %v1.1
%tmp1.0 = sub i32 %tmp0.0, %tmp0.1
%tmp1.1 = sub i32 %tmp0.0, %tmp0.1
%tmp1.2 = sub i32 %tmp0.2, %tmp0.3
%tmp1.3 = sub i32 %tmp0.3, %tmp0.2
%tmp2.0 = insertelement <4 x i32> poison, i32 %tmp1.0, i32 0
%tmp2.1 = insertelement <4 x i32> %tmp2.0, i32 %tmp1.1, i32 1
%tmp2.2 = insertelement <4 x i32> %tmp2.1, i32 %tmp1.2, i32 2
%tmp2.3 = insertelement <4 x i32> %tmp2.2, i32 %tmp1.3, i32 3
ret <4 x i32> %tmp2.3
}
define <4 x i32> @build_vec_v4i32_3_binops(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-LABEL: @build_vec_v4i32_3_binops(
; CHECK-NEXT: [[V0_0:%.*]] = extractelement <2 x i32> [[V0:%.*]], i32 0
; CHECK-NEXT: [[V0_1:%.*]] = extractelement <2 x i32> [[V0]], i32 1
; CHECK-NEXT: [[V1_0:%.*]] = extractelement <2 x i32> [[V1:%.*]], i32 0
; CHECK-NEXT: [[V1_1:%.*]] = extractelement <2 x i32> [[V1]], i32 1
; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V1_0]]
; CHECK-NEXT: [[TMP0_1:%.*]] = add i32 [[V0_1]], [[V1_1]]
; CHECK-NEXT: [[TMP1_0:%.*]] = mul i32 [[V0_0]], [[V1_0]]
; CHECK-NEXT: [[TMP1_1:%.*]] = mul i32 [[V0_1]], [[V1_1]]
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; CHECK-NEXT: [[TMP2_0:%.*]] = add i32 [[TMP0_0]], [[TMP0_1]]
; CHECK-NEXT: [[TMP2_1:%.*]] = add i32 [[TMP1_0]], [[TMP1_1]]
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3_0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2_0]], i32 0
; CHECK-NEXT: [[TMP3_1:%.*]] = insertelement <4 x i32> [[TMP3_0]], i32 [[TMP2_1]], i32 1
; CHECK-NEXT: [[TMP3_3:%.*]] = shufflevector <4 x i32> [[TMP3_1]], <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x i32> [[TMP3_3]]
;
%v0.0 = extractelement <2 x i32> %v0, i32 0
%v0.1 = extractelement <2 x i32> %v0, i32 1
%v1.0 = extractelement <2 x i32> %v1, i32 0
%v1.1 = extractelement <2 x i32> %v1, i32 1
%tmp0.0 = add i32 %v0.0, %v1.0
%tmp0.1 = add i32 %v0.1, %v1.1
%tmp0.2 = xor i32 %v0.0, %v1.0
%tmp0.3 = xor i32 %v0.1, %v1.1
%tmp1.0 = mul i32 %v0.0, %v1.0
%tmp1.1 = mul i32 %v0.1, %v1.1
%tmp1.2 = xor i32 %v0.0, %v1.0
%tmp1.3 = xor i32 %v0.1, %v1.1
%tmp2.0 = add i32 %tmp0.0, %tmp0.1
%tmp2.1 = add i32 %tmp1.0, %tmp1.1
%tmp2.2 = add i32 %tmp0.2, %tmp0.3
%tmp2.3 = add i32 %tmp1.2, %tmp1.3
%tmp3.0 = insertelement <4 x i32> poison, i32 %tmp2.0, i32 0
%tmp3.1 = insertelement <4 x i32> %tmp3.0, i32 %tmp2.1, i32 1
%tmp3.2 = insertelement <4 x i32> %tmp3.1, i32 %tmp2.2, i32 2
%tmp3.3 = insertelement <4 x i32> %tmp3.2, i32 %tmp2.3, i32 3
ret <4 x i32> %tmp3.3
}
define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @reduction_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = lshr <4 x i32> [[TMP9]], <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i32> [[TMP10]], <i32 65537, i32 65537, i32 65537, i32 65537>
; CHECK-NEXT: [[TMP12:%.*]] = mul nuw <4 x i32> [[TMP11]], <i32 65535, i32 65535, i32 65535, i32 65535>
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP9]]
; CHECK-NEXT: [[TMP14:%.*]] = xor <4 x i32> [[TMP13]], [[TMP12]]
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP14]])
; CHECK-NEXT: ret i32 [[TMP15]]
;
%v0.0 = extractelement <4 x i32> %v0, i32 0
%v0.1 = extractelement <4 x i32> %v0, i32 1
%v0.2 = extractelement <4 x i32> %v0, i32 2
%v0.3 = extractelement <4 x i32> %v0, i32 3
%v1.0 = extractelement <4 x i32> %v1, i32 0
%v1.1 = extractelement <4 x i32> %v1, i32 1
%v1.2 = extractelement <4 x i32> %v1, i32 2
%v1.3 = extractelement <4 x i32> %v1, i32 3
%tmp0.0 = add i32 %v0.0, %v1.0
%tmp0.1 = add i32 %v0.1, %v1.1
%tmp0.2 = add i32 %v0.2, %v1.2
%tmp0.3 = add i32 %v0.3, %v1.3
%tmp1.0 = sub i32 %v0.0, %v1.0
%tmp1.1 = sub i32 %v0.1, %v1.1
%tmp1.2 = sub i32 %v0.2, %v1.2
%tmp1.3 = sub i32 %v0.3, %v1.3
%tmp2.0 = add i32 %tmp0.0, %tmp0.1
%tmp2.1 = add i32 %tmp1.0, %tmp1.1
%tmp2.2 = add i32 %tmp0.2, %tmp0.3
%tmp2.3 = add i32 %tmp1.2, %tmp1.3
%tmp3.0 = lshr i32 %tmp2.0, 15
%tmp3.1 = lshr i32 %tmp2.1, 15
%tmp3.2 = lshr i32 %tmp2.2, 15
%tmp3.3 = lshr i32 %tmp2.3, 15
%tmp4.0 = and i32 %tmp3.0, 65537
%tmp4.1 = and i32 %tmp3.1, 65537
%tmp4.2 = and i32 %tmp3.2, 65537
%tmp4.3 = and i32 %tmp3.3, 65537
%tmp5.0 = mul nuw i32 %tmp4.0, 65535
%tmp5.1 = mul nuw i32 %tmp4.1, 65535
%tmp5.2 = mul nuw i32 %tmp4.2, 65535
%tmp5.3 = mul nuw i32 %tmp4.3, 65535
%tmp6.0 = add i32 %tmp5.0, %tmp2.0
%tmp6.1 = add i32 %tmp5.1, %tmp2.1
%tmp6.2 = add i32 %tmp5.2, %tmp2.2
%tmp6.3 = add i32 %tmp5.3, %tmp2.3
%tmp7.0 = xor i32 %tmp6.0, %tmp5.0
%tmp7.1 = xor i32 %tmp6.1, %tmp5.1
%tmp7.2 = xor i32 %tmp6.2, %tmp5.2
%tmp7.3 = xor i32 %tmp6.3, %tmp5.3
%reduce.0 = add i32 %tmp7.1, %tmp7.0
%reduce.1 = add i32 %reduce.0, %tmp7.2
%reduce.2 = add i32 %reduce.1, %tmp7.3
ret i32 %reduce.2
}

View File

@ -0,0 +1,336 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -slp-vectorizer -instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer -instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer -instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
; GFX7-LABEL: @uadd_sat_v2i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX7-NEXT: ret <2 x i16> [[INS_1]]
;
; GFX8-LABEL: @uadd_sat_v2i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
;
bb:
%arg0.0 = extractelement <2 x i16> %arg0, i64 0
%arg0.1 = extractelement <2 x i16> %arg0, i64 1
%arg1.0 = extractelement <2 x i16> %arg1, i64 0
%arg1.1 = extractelement <2 x i16> %arg1, i64 1
%add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
%add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
%ins.0 = insertelement <2 x i16> poison, i16 %add.0, i64 0
%ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
ret <2 x i16> %ins.1
}
define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
; GFX7-LABEL: @usub_sat_v2i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX7-NEXT: ret <2 x i16> [[INS_1]]
;
; GFX8-LABEL: @usub_sat_v2i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
;
bb:
%arg0.0 = extractelement <2 x i16> %arg0, i64 0
%arg0.1 = extractelement <2 x i16> %arg0, i64 1
%arg1.0 = extractelement <2 x i16> %arg1, i64 0
%arg1.1 = extractelement <2 x i16> %arg1, i64 1
%add.0 = call i16 @llvm.usub.sat.i16(i16 %arg0.0, i16 %arg1.0)
%add.1 = call i16 @llvm.usub.sat.i16(i16 %arg0.1, i16 %arg1.1)
%ins.0 = insertelement <2 x i16> poison, i16 %add.0, i64 0
%ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
ret <2 x i16> %ins.1
}
define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
; GFX7-LABEL: @sadd_sat_v2i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX7-NEXT: ret <2 x i16> [[INS_1]]
;
; GFX8-LABEL: @sadd_sat_v2i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
;
bb:
%arg0.0 = extractelement <2 x i16> %arg0, i64 0
%arg0.1 = extractelement <2 x i16> %arg0, i64 1
%arg1.0 = extractelement <2 x i16> %arg1, i64 0
%arg1.1 = extractelement <2 x i16> %arg1, i64 1
%add.0 = call i16 @llvm.sadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
%add.1 = call i16 @llvm.sadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
%ins.0 = insertelement <2 x i16> poison, i16 %add.0, i64 0
%ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
ret <2 x i16> %ins.1
}
define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
; GFX7-LABEL: @ssub_sat_v2i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX7-NEXT: ret <2 x i16> [[INS_1]]
;
; GFX8-LABEL: @ssub_sat_v2i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
;
bb:
%arg0.0 = extractelement <2 x i16> %arg0, i64 0
%arg0.1 = extractelement <2 x i16> %arg0, i64 1
%arg1.0 = extractelement <2 x i16> %arg1, i64 0
%arg1.1 = extractelement <2 x i16> %arg1, i64 1
%add.0 = call i16 @llvm.ssub.sat.i16(i16 %arg0.0, i16 %arg1.0)
%add.1 = call i16 @llvm.ssub.sat.i16(i16 %arg0.1, i16 %arg1.1)
%ins.0 = insertelement <2 x i16> poison, i16 %add.0, i64 0
%ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
ret <2 x i16> %ins.1
}
define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
; GCN-LABEL: @uadd_sat_v2i32(
; GCN-NEXT: bb:
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
; GCN-NEXT: ret <2 x i32> [[INS_1]]
;
bb:
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
%arg1.0 = extractelement <2 x i32> %arg1, i64 0
%arg1.1 = extractelement <2 x i32> %arg1, i64 1
%add.0 = call i32 @llvm.uadd.sat.i32(i32 %arg0.0, i32 %arg1.0)
%add.1 = call i32 @llvm.uadd.sat.i32(i32 %arg0.1, i32 %arg1.1)
%ins.0 = insertelement <2 x i32> poison, i32 %add.0, i64 0
%ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
ret <2 x i32> %ins.1
}
define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
; GCN-LABEL: @usub_sat_v2i32(
; GCN-NEXT: bb:
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
; GCN-NEXT: ret <2 x i32> [[INS_1]]
;
bb:
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
%arg1.0 = extractelement <2 x i32> %arg1, i64 0
%arg1.1 = extractelement <2 x i32> %arg1, i64 1
%add.0 = call i32 @llvm.usub.sat.i32(i32 %arg0.0, i32 %arg1.0)
%add.1 = call i32 @llvm.usub.sat.i32(i32 %arg0.1, i32 %arg1.1)
%ins.0 = insertelement <2 x i32> poison, i32 %add.0, i64 0
%ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
ret <2 x i32> %ins.1
}
define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
; GCN-LABEL: @sadd_sat_v2i32(
; GCN-NEXT: bb:
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
; GCN-NEXT: ret <2 x i32> [[INS_1]]
;
bb:
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
%arg1.0 = extractelement <2 x i32> %arg1, i64 0
%arg1.1 = extractelement <2 x i32> %arg1, i64 1
%add.0 = call i32 @llvm.sadd.sat.i32(i32 %arg0.0, i32 %arg1.0)
%add.1 = call i32 @llvm.sadd.sat.i32(i32 %arg0.1, i32 %arg1.1)
%ins.0 = insertelement <2 x i32> poison, i32 %add.0, i64 0
%ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
ret <2 x i32> %ins.1
}
define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
; GCN-LABEL: @ssub_sat_v2i32(
; GCN-NEXT: bb:
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
; GCN-NEXT: ret <2 x i32> [[INS_1]]
;
bb:
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
%arg1.0 = extractelement <2 x i32> %arg1, i64 0
%arg1.1 = extractelement <2 x i32> %arg1, i64 1
%add.0 = call i32 @llvm.ssub.sat.i32(i32 %arg0.0, i32 %arg1.0)
%add.1 = call i32 @llvm.ssub.sat.i32(i32 %arg0.1, i32 %arg1.1)
%ins.0 = insertelement <2 x i32> poison, i32 %add.0, i64 0
%ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
ret <2 x i32> %ins.1
}
define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
; GFX7-LABEL: @uadd_sat_v3i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1
; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1
; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX7-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
; GFX7-NEXT: ret <3 x i16> [[INS_2]]
;
; GFX8-LABEL: @uadd_sat_v3i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> undef, <2 x i32> <i32 0, i32 1>
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> <i32 0, i32 1>
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
; GFX8-NEXT: [[TMP3:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0
; GFX8-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[TMP3]], i64 0
; GFX8-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1
; GFX8-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[TMP4]], i64 1
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
; GFX8-NEXT: ret <3 x i16> [[INS_2]]
;
bb:
%arg0.0 = extractelement <3 x i16> %arg0, i64 0
%arg0.1 = extractelement <3 x i16> %arg0, i64 1
%arg0.2 = extractelement <3 x i16> %arg0, i64 2
%arg1.0 = extractelement <3 x i16> %arg1, i64 0
%arg1.1 = extractelement <3 x i16> %arg1, i64 1
%arg1.2 = extractelement <3 x i16> %arg1, i64 2
%add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
%add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
%add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2)
%ins.0 = insertelement <3 x i16> poison, i16 %add.0, i64 0
%ins.1 = insertelement <3 x i16> %ins.0, i16 %add.1, i64 1
%ins.2 = insertelement <3 x i16> %ins.1, i16 %add.2, i64 2
ret <3 x i16> %ins.2
}
define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
; GFX7-LABEL: @uadd_sat_v4i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1
; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0]], i64 2
; GFX7-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1
; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1]], i64 2
; GFX7-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
; GFX7-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX7-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
; GFX7-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
; GFX7-NEXT: ret <4 x i16> [[INS_3]]
;
; GFX8-LABEL: @uadd_sat_v4i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> undef, <2 x i32> <i32 0, i32 1>
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> undef, <2 x i32> <i32 0, i32 1>
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
; GFX8-NEXT: [[INS_3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; GFX8-NEXT: ret <4 x i16> [[INS_3]]
;
bb:
%arg0.0 = extractelement <4 x i16> %arg0, i64 0
%arg0.1 = extractelement <4 x i16> %arg0, i64 1
%arg0.2 = extractelement <4 x i16> %arg0, i64 2
%arg0.3 = extractelement <4 x i16> %arg0, i64 3
%arg1.0 = extractelement <4 x i16> %arg1, i64 0
%arg1.1 = extractelement <4 x i16> %arg1, i64 1
%arg1.2 = extractelement <4 x i16> %arg1, i64 2
%arg1.3 = extractelement <4 x i16> %arg1, i64 3
%add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
%add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
%add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2)
%add.3 = call i16 @llvm.uadd.sat.i16(i16 %arg0.3, i16 %arg1.3)
%ins.0 = insertelement <4 x i16> poison, i16 %add.0, i64 0
%ins.1 = insertelement <4 x i16> %ins.0, i16 %add.1, i64 1
%ins.2 = insertelement <4 x i16> %ins.1, i16 %add.2, i64 2
%ins.3 = insertelement <4 x i16> %ins.2, i16 %add.3, i64 3
ret <4 x i16> %ins.3
}
declare i16 @llvm.uadd.sat.i16(i16, i16) #0
declare i16 @llvm.usub.sat.i16(i16, i16) #0
declare i16 @llvm.sadd.sat.i16(i16, i16) #0
declare i16 @llvm.ssub.sat.i16(i16, i16) #0
declare i32 @llvm.uadd.sat.i32(i32, i32) #0
declare i32 @llvm.usub.sat.i32(i32, i32) #0
declare i32 @llvm.sadd.sat.i32(i32, i32) #0
declare i32 @llvm.ssub.sat.i32(i32, i32) #0
attributes #0 = { nounwind readnone speculatable willreturn }

View File

@ -0,0 +1,38 @@
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; GCN-LABEL: @bswap_v2i16(
; GFX7: call i16 @llvm.bswap.i16(
; GFX7: call i16 @llvm.bswap.i16(
; GFX8: call <2 x i16> @llvm.bswap.v2i16(
define <2 x i16> @bswap_v2i16(<2 x i16> %arg) {
bb:
%tmp = extractelement <2 x i16> %arg, i64 0
%tmp1 = tail call i16 @llvm.bswap.i16(i16 %tmp)
%tmp2 = insertelement <2 x i16> poison, i16 %tmp1, i64 0
%tmp3 = extractelement <2 x i16> %arg, i64 1
%tmp4 = tail call i16 @llvm.bswap.i16(i16 %tmp3)
%tmp5 = insertelement <2 x i16> %tmp2, i16 %tmp4, i64 1
ret <2 x i16> %tmp5
}
; GCN-LABEL: @bswap_v2i32(
; GCN: call i32 @llvm.bswap.i32
; GCN: call i32 @llvm.bswap.i32
define <2 x i32> @bswap_v2i32(<2 x i32> %arg) {
bb:
%tmp = extractelement <2 x i32> %arg, i64 0
%tmp1 = tail call i32 @llvm.bswap.i32(i32 %tmp)
%tmp2 = insertelement <2 x i32> poison, i32 %tmp1, i64 0
%tmp3 = extractelement <2 x i32> %arg, i64 1
%tmp4 = tail call i32 @llvm.bswap.i32(i32 %tmp3)
%tmp5 = insertelement <2 x i32> %tmp2, i32 %tmp4, i64 1
ret <2 x i32> %tmp5
}
declare i16 @llvm.bswap.i16(i16) #0
declare i32 @llvm.bswap.i32(i32) #0
attributes #0 = { nounwind readnone speculatable willreturn }

View File

@ -0,0 +1,38 @@
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; GCN-LABEL: @round_v2f16(
; GFX7: call half @llvm.round.f16(
; GFX7: call half @llvm.round.f16(
; GFX8: call <2 x half> @llvm.round.v2f16(
define <2 x half> @round_v2f16(<2 x half> %arg) {
bb:
%tmp = extractelement <2 x half> %arg, i64 0
%tmp1 = tail call half @llvm.round.half(half %tmp)
%tmp2 = insertelement <2 x half> poison, half %tmp1, i64 0
%tmp3 = extractelement <2 x half> %arg, i64 1
%tmp4 = tail call half @llvm.round.half(half %tmp3)
%tmp5 = insertelement <2 x half> %tmp2, half %tmp4, i64 1
ret <2 x half> %tmp5
}
; GCN-LABEL: @round_v2f32(
; GCN: call float @llvm.round.f32(
; GCN: call float @llvm.round.f32(
define <2 x float> @round_v2f32(<2 x float> %arg) {
bb:
%tmp = extractelement <2 x float> %arg, i64 0
%tmp1 = tail call float @llvm.round.f32(float %tmp)
%tmp2 = insertelement <2 x float> poison, float %tmp1, i64 0
%tmp3 = extractelement <2 x float> %arg, i64 1
%tmp4 = tail call float @llvm.round.f32(float %tmp3)
%tmp5 = insertelement <2 x float> %tmp2, float %tmp4, i64 1
ret <2 x float> %tmp5
}
declare half @llvm.round.half(half) #0
declare float @llvm.round.f32(float) #0
attributes #0 = { nounwind readnone speculatable willreturn }

View File

@ -0,0 +1,31 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -S -mtriple=thumb7 -mcpu=swift | FileCheck %s
define <4 x i32> @PR13837(<4 x float> %in) {
; CHECK-LABEL: @PR13837(
; CHECK-NEXT: [[TMP1:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP4]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP5]], i32 3
; CHECK-NEXT: ret <4 x i32> [[V3]]
;
%t0 = extractelement <4 x float> %in, i64 0
%t1 = extractelement <4 x float> %in, i64 1
%t2 = extractelement <4 x float> %in, i64 2
%t3 = extractelement <4 x float> %in, i64 3
%c0 = fptosi float %t0 to i32
%c1 = fptosi float %t1 to i32
%c2 = fptosi float %t2 to i32
%c3 = fptosi float %t3 to i32
%v0 = insertelement <4 x i32> poison, i32 %c0, i32 0
%v1 = insertelement <4 x i32> %v0, i32 %c1, i32 1
%v2 = insertelement <4 x i32> %v1, i32 %c2, i32 2
%v3 = insertelement <4 x i32> %v2, i32 %c3, i32 3
ret <4 x i32> %v3
}

View File

@ -0,0 +1,57 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -o - -S -slp-threshold=-1000 | FileCheck %s
target datalayout = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx--nvidiacl"
; CTLZ cannot be vectorized currently because the second argument is a scalar
; for both the scalar and vector forms of the intrinsic. In the future it
; should be possible to vectorize such functions.
; Test causes an assert if LLVM tries to vectorize CTLZ.
define <2 x i8> @cltz_test(<2 x i8> %x) #0 {
; CHECK-LABEL: @cltz_test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[CALL_I:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP0]], i1 false)
; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i8> poison, i8 [[CALL_I]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i8> [[X]], i32 1
; CHECK-NEXT: [[CALL_I4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 false)
; CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <2 x i8> [[VECINIT]], i8 [[CALL_I4]], i32 1
; CHECK-NEXT: ret <2 x i8> [[VECINIT2]]
;
entry:
%0 = extractelement <2 x i8> %x, i32 0
%call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
%vecinit = insertelement <2 x i8> poison, i8 %call.i, i32 0
%1 = extractelement <2 x i8> %x, i32 1
%call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
%vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
ret <2 x i8> %vecinit2
}
define <2 x i8> @cltz_test2(<2 x i8> %x) #1 {
; CHECK-LABEL: @cltz_test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i8> [[X]], i32 1
; CHECK-NEXT: [[CALL_I:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP0]], i1 false)
; CHECK-NEXT: [[CALL_I4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 false)
; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i8> poison, i8 [[CALL_I]], i32 0
; CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <2 x i8> [[VECINIT]], i8 [[CALL_I4]], i32 1
; CHECK-NEXT: ret <2 x i8> [[VECINIT2]]
;
entry:
%0 = extractelement <2 x i8> %x, i32 0
%1 = extractelement <2 x i8> %x, i32 1
%call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
%call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
%vecinit = insertelement <2 x i8> poison, i8 %call.i, i32 0
%vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
ret <2 x i8> %vecinit2
}
declare i8 @llvm.ctlz.i8(i8, i1) #3
attributes #0 = { alwaysinline nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }

View File

@ -0,0 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer < %s -S -o - -mtriple=x86_64-apple-macosx10.10.0 -mcpu=core2 | FileCheck %s
define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
; CHECK-LABEL: @_Z10fooConvertPDv4_xS0_S0_PKS_(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> undef, i32 4
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x half> undef, i32 5
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> undef, half [[TMP0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[TMP1]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
; CHECK-NEXT: [[VECINS_I_4_I:%.*]] = insertelement <8 x i32> poison, i32 [[TMP6]], i32 4
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
; CHECK-NEXT: [[VECINS_I_5_I:%.*]] = insertelement <8 x i32> [[VECINS_I_4_I]], i32 [[TMP7]], i32 5
; CHECK-NEXT: ret void
;
entry:
%0 = extractelement <16 x half> undef, i32 4
%conv.i.4.i = fpext half %0 to float
%1 = bitcast float %conv.i.4.i to i32
%vecins.i.4.i = insertelement <8 x i32> poison, i32 %1, i32 4
%2 = extractelement <16 x half> undef, i32 5
%conv.i.5.i = fpext half %2 to float
%3 = bitcast float %conv.i.5.i to i32
%vecins.i.5.i = insertelement <8 x i32> %vecins.i.4.i, i32 %3, i32 5
ret void
}

View File

@ -0,0 +1,65 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s
define <8 x float> @ceil_floor(<8 x float> %a) {
; CHECK-LABEL: @ceil_floor(
; CHECK-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1
; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3
; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4
; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5
; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6
; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7
; CHECK-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]])
; CHECK-NEXT: [[AB1:%.*]] = call float @llvm.floor.f32(float [[A1]])
; CHECK-NEXT: [[AB2:%.*]] = call float @llvm.floor.f32(float [[A2]])
; CHECK-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]])
; CHECK-NEXT: [[AB4:%.*]] = call float @llvm.ceil.f32(float [[A4]])
; CHECK-NEXT: [[AB5:%.*]] = call float @llvm.ceil.f32(float [[A5]])
; CHECK-NEXT: [[AB6:%.*]] = call float @llvm.floor.f32(float [[A6]])
; CHECK-NEXT: [[AB7:%.*]] = call float @llvm.floor.f32(float [[A7]])
; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i32 0
; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1
; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2
; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3
; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
; CHECK-NEXT: ret <8 x float> [[R7]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
%a2 = extractelement <8 x float> %a, i32 2
%a3 = extractelement <8 x float> %a, i32 3
%a4 = extractelement <8 x float> %a, i32 4
%a5 = extractelement <8 x float> %a, i32 5
%a6 = extractelement <8 x float> %a, i32 6
%a7 = extractelement <8 x float> %a, i32 7
%ab0 = call float @llvm.ceil.f32(float %a0)
%ab1 = call float @llvm.floor.f32(float %a1)
%ab2 = call float @llvm.floor.f32(float %a2)
%ab3 = call float @llvm.ceil.f32(float %a3)
%ab4 = call float @llvm.ceil.f32(float %a4)
%ab5 = call float @llvm.ceil.f32(float %a5)
%ab6 = call float @llvm.floor.f32(float %a6)
%ab7 = call float @llvm.floor.f32(float %a7)
%r0 = insertelement <8 x float> poison, float %ab0, i32 0
%r1 = insertelement <8 x float> %r0, float %ab1, i32 1
%r2 = insertelement <8 x float> %r1, float %ab2, i32 2
%r3 = insertelement <8 x float> %r2, float %ab3, i32 3
%r4 = insertelement <8 x float> %r3, float %ab4, i32 4
%r5 = insertelement <8 x float> %r4, float %ab5, i32 5
%r6 = insertelement <8 x float> %r5, float %ab6, i32 6
%r7 = insertelement <8 x float> %r6, float %ab7, i32 7
ret <8 x float> %r7
}
declare float @llvm.ceil.f32(float)
declare float @llvm.floor.f32(float)

View File

@ -0,0 +1,466 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
define <8 x float> @sitofp_uitofp(<8 x i32> %a) {
; SSE-LABEL: @sitofp_uitofp(
; SSE-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
; SSE-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
; SSE-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
; SSE-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
; SSE-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
; SSE-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
; SSE-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
; SSE-NEXT: [[AB0:%.*]] = sitofp i32 [[A0]] to float
; SSE-NEXT: [[AB1:%.*]] = sitofp i32 [[A1]] to float
; SSE-NEXT: [[AB2:%.*]] = sitofp i32 [[A2]] to float
; SSE-NEXT: [[AB3:%.*]] = sitofp i32 [[A3]] to float
; SSE-NEXT: [[AB4:%.*]] = uitofp i32 [[A4]] to float
; SSE-NEXT: [[AB5:%.*]] = uitofp i32 [[A5]] to float
; SSE-NEXT: [[AB6:%.*]] = uitofp i32 [[A6]] to float
; SSE-NEXT: [[AB7:%.*]] = uitofp i32 [[A7]] to float
; SSE-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i32 0
; SSE-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1
; SSE-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2
; SSE-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3
; SSE-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
; SSE-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
; SSE-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
; SSE-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
; SSE-NEXT: ret <8 x float> [[R7]]
;
; SLM-LABEL: @sitofp_uitofp(
; SLM-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[A:%.*]] to <8 x float>
; SLM-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[A]] to <8 x float>
; SLM-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; SLM-NEXT: ret <8 x float> [[R7]]
;
; AVX-LABEL: @sitofp_uitofp(
; AVX-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[A:%.*]] to <8 x float>
; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[A]] to <8 x float>
; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT: ret <8 x float> [[R7]]
;
; AVX512-LABEL: @sitofp_uitofp(
; AVX512-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[A:%.*]] to <8 x float>
; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[A]] to <8 x float>
; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x float> [[R7]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
%a2 = extractelement <8 x i32> %a, i32 2
%a3 = extractelement <8 x i32> %a, i32 3
%a4 = extractelement <8 x i32> %a, i32 4
%a5 = extractelement <8 x i32> %a, i32 5
%a6 = extractelement <8 x i32> %a, i32 6
%a7 = extractelement <8 x i32> %a, i32 7
%ab0 = sitofp i32 %a0 to float
%ab1 = sitofp i32 %a1 to float
%ab2 = sitofp i32 %a2 to float
%ab3 = sitofp i32 %a3 to float
%ab4 = uitofp i32 %a4 to float
%ab5 = uitofp i32 %a5 to float
%ab6 = uitofp i32 %a6 to float
%ab7 = uitofp i32 %a7 to float
%r0 = insertelement <8 x float> poison, float %ab0, i32 0
%r1 = insertelement <8 x float> %r0, float %ab1, i32 1
%r2 = insertelement <8 x float> %r1, float %ab2, i32 2
%r3 = insertelement <8 x float> %r2, float %ab3, i32 3
%r4 = insertelement <8 x float> %r3, float %ab4, i32 4
%r5 = insertelement <8 x float> %r4, float %ab5, i32 5
%r6 = insertelement <8 x float> %r5, float %ab6, i32 6
%r7 = insertelement <8 x float> %r6, float %ab7, i32 7
ret <8 x float> %r7
}
define <8 x i32> @fptosi_fptoui(<8 x float> %a) {
; SSE-LABEL: @fptosi_fptoui(
; SSE-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0
; SSE-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1
; SSE-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2
; SSE-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3
; SSE-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4
; SSE-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5
; SSE-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6
; SSE-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7
; SSE-NEXT: [[AB0:%.*]] = fptosi float [[A0]] to i32
; SSE-NEXT: [[AB1:%.*]] = fptosi float [[A1]] to i32
; SSE-NEXT: [[AB2:%.*]] = fptosi float [[A2]] to i32
; SSE-NEXT: [[AB3:%.*]] = fptosi float [[A3]] to i32
; SSE-NEXT: [[AB4:%.*]] = fptoui float [[A4]] to i32
; SSE-NEXT: [[AB5:%.*]] = fptoui float [[A5]] to i32
; SSE-NEXT: [[AB6:%.*]] = fptoui float [[A6]] to i32
; SSE-NEXT: [[AB7:%.*]] = fptoui float [[A7]] to i32
; SSE-NEXT: [[R0:%.*]] = insertelement <8 x i32> poison, i32 [[AB0]], i32 0
; SSE-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
; SSE-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
; SSE-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
; SSE-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
; SSE-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
; SSE-NEXT: ret <8 x i32> [[R7]]
;
; SLM-LABEL: @fptosi_fptoui(
; SLM-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3
; SLM-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4
; SLM-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5
; SLM-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6
; SLM-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7
; SLM-NEXT: [[AB0:%.*]] = fptosi float [[A0]] to i32
; SLM-NEXT: [[AB1:%.*]] = fptosi float [[A1]] to i32
; SLM-NEXT: [[AB2:%.*]] = fptosi float [[A2]] to i32
; SLM-NEXT: [[AB3:%.*]] = fptosi float [[A3]] to i32
; SLM-NEXT: [[AB4:%.*]] = fptoui float [[A4]] to i32
; SLM-NEXT: [[AB5:%.*]] = fptoui float [[A5]] to i32
; SLM-NEXT: [[AB6:%.*]] = fptoui float [[A6]] to i32
; SLM-NEXT: [[AB7:%.*]] = fptoui float [[A7]] to i32
; SLM-NEXT: [[R0:%.*]] = insertelement <8 x i32> poison, i32 [[AB0]], i32 0
; SLM-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
; SLM-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
; SLM-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
; SLM-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
; SLM-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
; SLM-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
; SLM-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
; SLM-NEXT: ret <8 x i32> [[R7]]
;
; AVX-LABEL: @fptosi_fptoui(
; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0
; AVX-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1
; AVX-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2
; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3
; AVX-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4
; AVX-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5
; AVX-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6
; AVX-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7
; AVX-NEXT: [[AB0:%.*]] = fptosi float [[A0]] to i32
; AVX-NEXT: [[AB1:%.*]] = fptosi float [[A1]] to i32
; AVX-NEXT: [[AB2:%.*]] = fptosi float [[A2]] to i32
; AVX-NEXT: [[AB3:%.*]] = fptosi float [[A3]] to i32
; AVX-NEXT: [[AB4:%.*]] = fptoui float [[A4]] to i32
; AVX-NEXT: [[AB5:%.*]] = fptoui float [[A5]] to i32
; AVX-NEXT: [[AB6:%.*]] = fptoui float [[A6]] to i32
; AVX-NEXT: [[AB7:%.*]] = fptoui float [[A7]] to i32
; AVX-NEXT: [[R0:%.*]] = insertelement <8 x i32> poison, i32 [[AB0]], i32 0
; AVX-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
; AVX-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
; AVX-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
; AVX-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
; AVX-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
; AVX-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
; AVX-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
; AVX-NEXT: ret <8 x i32> [[R7]]
;
; AVX512-LABEL: @fptosi_fptoui(
; AVX512-NEXT: [[TMP1:%.*]] = fptosi <8 x float> [[A:%.*]] to <8 x i32>
; AVX512-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[A]] to <8 x i32>
; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[R7]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
%a2 = extractelement <8 x float> %a, i32 2
%a3 = extractelement <8 x float> %a, i32 3
%a4 = extractelement <8 x float> %a, i32 4
%a5 = extractelement <8 x float> %a, i32 5
%a6 = extractelement <8 x float> %a, i32 6
%a7 = extractelement <8 x float> %a, i32 7
%ab0 = fptosi float %a0 to i32
%ab1 = fptosi float %a1 to i32
%ab2 = fptosi float %a2 to i32
%ab3 = fptosi float %a3 to i32
%ab4 = fptoui float %a4 to i32
%ab5 = fptoui float %a5 to i32
%ab6 = fptoui float %a6 to i32
%ab7 = fptoui float %a7 to i32
%r0 = insertelement <8 x i32> poison, i32 %ab0, i32 0
%r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
%r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
%r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
%r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
%r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
%r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
%r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
ret <8 x i32> %r7
}
define <8 x float> @fneg_fabs(<8 x float> %a) {
; CHECK-LABEL: @fneg_fabs(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[A:%.*]] to <8 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = xor <8 x i32> [[TMP1]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3:%.*]] = and <8 x i32> [[TMP1]], <i32 undef, i32 undef, i32 undef, i32 undef, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP4]] to <8 x float>
; CHECK-NEXT: ret <8 x float> [[TMP5]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
%a2 = extractelement <8 x float> %a, i32 2
%a3 = extractelement <8 x float> %a, i32 3
%a4 = extractelement <8 x float> %a, i32 4
%a5 = extractelement <8 x float> %a, i32 5
%a6 = extractelement <8 x float> %a, i32 6
%a7 = extractelement <8 x float> %a, i32 7
%aa0 = bitcast float %a0 to i32
%aa1 = bitcast float %a1 to i32
%aa2 = bitcast float %a2 to i32
%aa3 = bitcast float %a3 to i32
%aa4 = bitcast float %a4 to i32
%aa5 = bitcast float %a5 to i32
%aa6 = bitcast float %a6 to i32
%aa7 = bitcast float %a7 to i32
%ab0 = xor i32 %aa0, -2147483648
%ab1 = xor i32 %aa1, -2147483648
%ab2 = xor i32 %aa2, -2147483648
%ab3 = xor i32 %aa3, -2147483648
%ab4 = and i32 %aa4, 2147483647
%ab5 = and i32 %aa5, 2147483647
%ab6 = and i32 %aa6, 2147483647
%ab7 = and i32 %aa7, 2147483647
%ac0 = bitcast i32 %ab0 to float
%ac1 = bitcast i32 %ab1 to float
%ac2 = bitcast i32 %ab2 to float
%ac3 = bitcast i32 %ab3 to float
%ac4 = bitcast i32 %ab4 to float
%ac5 = bitcast i32 %ab5 to float
%ac6 = bitcast i32 %ab6 to float
%ac7 = bitcast i32 %ab7 to float
%r0 = insertelement <8 x float> poison, float %ac0, i32 0
%r1 = insertelement <8 x float> %r0, float %ac1, i32 1
%r2 = insertelement <8 x float> %r1, float %ac2, i32 2
%r3 = insertelement <8 x float> %r2, float %ac3, i32 3
%r4 = insertelement <8 x float> %r3, float %ac4, i32 4
%r5 = insertelement <8 x float> %r4, float %ac5, i32 5
%r6 = insertelement <8 x float> %r5, float %ac6, i32 6
%r7 = insertelement <8 x float> %r6, float %ac7, i32 7
ret <8 x float> %r7
}
define <8 x i32> @sext_zext(<8 x i16> %a) {
; CHECK-LABEL: @sext_zext(
; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[R7]]
;
%a0 = extractelement <8 x i16> %a, i32 0
%a1 = extractelement <8 x i16> %a, i32 1
%a2 = extractelement <8 x i16> %a, i32 2
%a3 = extractelement <8 x i16> %a, i32 3
%a4 = extractelement <8 x i16> %a, i32 4
%a5 = extractelement <8 x i16> %a, i32 5
%a6 = extractelement <8 x i16> %a, i32 6
%a7 = extractelement <8 x i16> %a, i32 7
%ab0 = sext i16 %a0 to i32
%ab1 = sext i16 %a1 to i32
%ab2 = sext i16 %a2 to i32
%ab3 = sext i16 %a3 to i32
%ab4 = zext i16 %a4 to i32
%ab5 = zext i16 %a5 to i32
%ab6 = zext i16 %a6 to i32
%ab7 = zext i16 %a7 to i32
%r0 = insertelement <8 x i32> poison, i32 %ab0, i32 0
%r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
%r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
%r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
%r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
%r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
%r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
%r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
ret <8 x i32> %r7
}
define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) {
; CHECK-LABEL: @sitofp_4i32_8i16(
; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
; CHECK-NEXT: [[B2:%.*]] = extractelement <8 x i16> [[B]], i32 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <8 x i16> [[B]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
; CHECK-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
; CHECK-NEXT: [[AB5:%.*]] = sitofp i16 [[B1]] to float
; CHECK-NEXT: [[AB6:%.*]] = sitofp i16 [[B2]] to float
; CHECK-NEXT: [[AB7:%.*]] = sitofp i16 [[B3]] to float
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP4]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP5]], i32 3
; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
; CHECK-NEXT: ret <8 x float> [[R7]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
%a2 = extractelement <4 x i32> %a, i32 2
%a3 = extractelement <4 x i32> %a, i32 3
%b0 = extractelement <8 x i16> %b, i32 0
%b1 = extractelement <8 x i16> %b, i32 1
%b2 = extractelement <8 x i16> %b, i32 2
%b3 = extractelement <8 x i16> %b, i32 3
%ab0 = sitofp i32 %a0 to float
%ab1 = sitofp i32 %a1 to float
%ab2 = sitofp i32 %a2 to float
%ab3 = sitofp i32 %a3 to float
%ab4 = sitofp i16 %b0 to float
%ab5 = sitofp i16 %b1 to float
%ab6 = sitofp i16 %b2 to float
%ab7 = sitofp i16 %b3 to float
%r0 = insertelement <8 x float> poison, float %ab0, i32 0
%r1 = insertelement <8 x float> %r0, float %ab1, i32 1
%r2 = insertelement <8 x float> %r1, float %ab2, i32 2
%r3 = insertelement <8 x float> %r2, float %ab3, i32 3
%r4 = insertelement <8 x float> %r3, float %ab4, i32 4
%r5 = insertelement <8 x float> %r4, float %ab5, i32 5
%r6 = insertelement <8 x float> %r5, float %ab6, i32 6
%r7 = insertelement <8 x float> %r6, float %ab7, i32 7
ret <8 x float> %r7
}
; Inspired by PR38154
define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 x i8> %c) {
; SSE-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
; SSE-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0
; SSE-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
; SSE-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
; SSE-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
; SSE-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0
; SSE-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1
; SSE-NEXT: [[AB0:%.*]] = sitofp i32 [[A0]] to float
; SSE-NEXT: [[AB1:%.*]] = sitofp i32 [[A1]] to float
; SSE-NEXT: [[AB2:%.*]] = uitofp i32 [[A2]] to float
; SSE-NEXT: [[AB3:%.*]] = uitofp i32 [[A3]] to float
; SSE-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
; SSE-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float
; SSE-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float
; SSE-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float
; SSE-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i32 0
; SSE-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1
; SSE-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2
; SSE-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3
; SSE-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
; SSE-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
; SSE-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
; SSE-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
; SSE-NEXT: ret <8 x float> [[R7]]
;
; SLM-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
; SLM-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
; SLM-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0
; SLM-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1
; SLM-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
; SLM-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
; SLM-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
; SLM-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float
; SLM-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float
; SLM-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float
; SLM-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i32 0
; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
; SLM-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP4]], i32 1
; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
; SLM-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP5]], i32 2
; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP6]], i32 3
; SLM-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
; SLM-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
; SLM-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
; SLM-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
; SLM-NEXT: ret <8 x float> [[R7]]
;
; AVX-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
; AVX-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0
; AVX-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
; AVX-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
; AVX-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
; AVX-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
; AVX-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
; AVX-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0
; AVX-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1
; AVX-NEXT: [[AB0:%.*]] = sitofp i32 [[A0]] to float
; AVX-NEXT: [[AB1:%.*]] = sitofp i32 [[A1]] to float
; AVX-NEXT: [[AB2:%.*]] = uitofp i32 [[A2]] to float
; AVX-NEXT: [[AB3:%.*]] = uitofp i32 [[A3]] to float
; AVX-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
; AVX-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float
; AVX-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float
; AVX-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float
; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i32 0
; AVX-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1
; AVX-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2
; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3
; AVX-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
; AVX-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
; AVX-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
; AVX-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
; AVX-NEXT: ret <8 x float> [[R7]]
;
; AVX512-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
; AVX512-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
; AVX512-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
; AVX512-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0
; AVX512-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1
; AVX512-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
; AVX512-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
; AVX512-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
; AVX512-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float
; AVX512-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float
; AVX512-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float
; AVX512-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; AVX512-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i32 0
; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP4]], i32 1
; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
; AVX512-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP5]], i32 2
; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
; AVX512-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP6]], i32 3
; AVX512-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
; AVX512-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
; AVX512-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
; AVX512-NEXT: ret <8 x float> [[R7]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
%a2 = extractelement <4 x i32> %a, i32 2
%a3 = extractelement <4 x i32> %a, i32 3
%b0 = extractelement <8 x i16> %b, i32 0
%b1 = extractelement <8 x i16> %b, i32 1
%c0 = extractelement <16 x i8> %c, i32 0
%c1 = extractelement <16 x i8> %c, i32 1
%ab0 = sitofp i32 %a0 to float
%ab1 = sitofp i32 %a1 to float
%ab2 = uitofp i32 %a2 to float
%ab3 = uitofp i32 %a3 to float
%ab4 = sitofp i16 %b0 to float
%ab5 = uitofp i16 %b1 to float
%ab6 = sitofp i8 %c0 to float
%ab7 = uitofp i8 %c1 to float
%r0 = insertelement <8 x float> poison, float %ab0, i32 0
%r1 = insertelement <8 x float> %r0, float %ab1, i32 1
%r2 = insertelement <8 x float> %r1, float %ab2, i32 2
%r3 = insertelement <8 x float> %r2, float %ab3, i32 3
%r4 = insertelement <8 x float> %r3, float %ab4, i32 4
%r5 = insertelement <8 x float> %r4, float %ab5, i32 5
%r6 = insertelement <8 x float> %r5, float %ab6, i32 6
%r7 = insertelement <8 x float> %r6, float %ab7, i32 7
ret <8 x float> %r7
}

View File

@ -0,0 +1,179 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: @fadd_fsub_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]]
; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[R7]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
%a2 = extractelement <8 x float> %a, i32 2
%a3 = extractelement <8 x float> %a, i32 3
%a4 = extractelement <8 x float> %a, i32 4
%a5 = extractelement <8 x float> %a, i32 5
%a6 = extractelement <8 x float> %a, i32 6
%a7 = extractelement <8 x float> %a, i32 7
%b0 = extractelement <8 x float> %b, i32 0
%b1 = extractelement <8 x float> %b, i32 1
%b2 = extractelement <8 x float> %b, i32 2
%b3 = extractelement <8 x float> %b, i32 3
%b4 = extractelement <8 x float> %b, i32 4
%b5 = extractelement <8 x float> %b, i32 5
%b6 = extractelement <8 x float> %b, i32 6
%b7 = extractelement <8 x float> %b, i32 7
%ab0 = fadd float %a0, %b0
%ab1 = fsub float %a1, %b1
%ab2 = fsub float %a2, %b2
%ab3 = fadd float %a3, %b3
%ab4 = fadd float %a4, %b4
%ab5 = fsub float %a5, %b5
%ab6 = fsub float %a6, %b6
%ab7 = fadd float %a7, %b7
%r0 = insertelement <8 x float> poison, float %ab0, i32 0
%r1 = insertelement <8 x float> %r0, float %ab1, i32 1
%r2 = insertelement <8 x float> %r1, float %ab2, i32 2
%r3 = insertelement <8 x float> %r2, float %ab3, i32 3
%r4 = insertelement <8 x float> %r3, float %ab4, i32 4
%r5 = insertelement <8 x float> %r4, float %ab5, i32 5
%r6 = insertelement <8 x float> %r5, float %ab6, i32 6
%r7 = insertelement <8 x float> %r6, float %ab7, i32 7
ret <8 x float> %r7
}
define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {
; SSE-LABEL: @fmul_fdiv_v8f32(
; SSE-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; SSE-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; SSE-NEXT: ret <8 x float> [[R7]]
;
; SLM-LABEL: @fmul_fdiv_v8f32(
; SLM-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3
; SLM-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4
; SLM-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5
; SLM-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6
; SLM-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7
; SLM-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i32 1
; SLM-NEXT: [[B2:%.*]] = extractelement <8 x float> [[B]], i32 2
; SLM-NEXT: [[B3:%.*]] = extractelement <8 x float> [[B]], i32 3
; SLM-NEXT: [[B4:%.*]] = extractelement <8 x float> [[B]], i32 4
; SLM-NEXT: [[B5:%.*]] = extractelement <8 x float> [[B]], i32 5
; SLM-NEXT: [[B6:%.*]] = extractelement <8 x float> [[B]], i32 6
; SLM-NEXT: [[B7:%.*]] = extractelement <8 x float> [[B]], i32 7
; SLM-NEXT: [[AB0:%.*]] = fmul float [[A0]], [[B0]]
; SLM-NEXT: [[AB1:%.*]] = fdiv float [[A1]], [[B1]]
; SLM-NEXT: [[AB2:%.*]] = fdiv float [[A2]], [[B2]]
; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], [[B3]]
; SLM-NEXT: [[AB4:%.*]] = fmul float [[A4]], [[B4]]
; SLM-NEXT: [[AB5:%.*]] = fdiv float [[A5]], [[B5]]
; SLM-NEXT: [[AB6:%.*]] = fdiv float [[A6]], [[B6]]
; SLM-NEXT: [[AB7:%.*]] = fmul float [[A7]], [[B7]]
; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i32 0
; SLM-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1
; SLM-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2
; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3
; SLM-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
; SLM-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
; SLM-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
; SLM-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
; SLM-NEXT: ret <8 x float> [[R7]]
;
; AVX-LABEL: @fmul_fdiv_v8f32(
; AVX-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; AVX-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX-NEXT: ret <8 x float> [[R7]]
;
; AVX512-LABEL: @fmul_fdiv_v8f32(
; AVX512-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX512-NEXT: ret <8 x float> [[R7]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
%a2 = extractelement <8 x float> %a, i32 2
%a3 = extractelement <8 x float> %a, i32 3
%a4 = extractelement <8 x float> %a, i32 4
%a5 = extractelement <8 x float> %a, i32 5
%a6 = extractelement <8 x float> %a, i32 6
%a7 = extractelement <8 x float> %a, i32 7
%b0 = extractelement <8 x float> %b, i32 0
%b1 = extractelement <8 x float> %b, i32 1
%b2 = extractelement <8 x float> %b, i32 2
%b3 = extractelement <8 x float> %b, i32 3
%b4 = extractelement <8 x float> %b, i32 4
%b5 = extractelement <8 x float> %b, i32 5
%b6 = extractelement <8 x float> %b, i32 6
%b7 = extractelement <8 x float> %b, i32 7
%ab0 = fmul float %a0, %b0
%ab1 = fdiv float %a1, %b1
%ab2 = fdiv float %a2, %b2
%ab3 = fmul float %a3, %b3
%ab4 = fmul float %a4, %b4
%ab5 = fdiv float %a5, %b5
%ab6 = fdiv float %a6, %b6
%ab7 = fmul float %a7, %b7
%r0 = insertelement <8 x float> poison, float %ab0, i32 0
%r1 = insertelement <8 x float> %r0, float %ab1, i32 1
%r2 = insertelement <8 x float> %r1, float %ab2, i32 2
%r3 = insertelement <8 x float> %r2, float %ab3, i32 3
%r4 = insertelement <8 x float> %r3, float %ab4, i32 4
%r5 = insertelement <8 x float> %r4, float %ab5, i32 5
%r6 = insertelement <8 x float> %r5, float %ab6, i32 6
%r7 = insertelement <8 x float> %r6, float %ab7, i32 7
ret <8 x float> %r7
}
define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) {
; SSE-LABEL: @fmul_fdiv_v4f32_const(
; SSE-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
; SSE-NEXT: ret <4 x float> [[TMP1]]
;
; SLM-LABEL: @fmul_fdiv_v4f32_const(
; SLM-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
; SLM-NEXT: [[AB0:%.*]] = fmul float [[A0]], 2.000000e+00
; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00
; SLM-NEXT: [[R0:%.*]] = insertelement <4 x float> poison, float [[AB0]], i32 0
; SLM-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[A1]], i32 1
; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[A2]], i32 2
; SLM-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3
; SLM-NEXT: ret <4 x float> [[R3]]
;
; AVX-LABEL: @fmul_fdiv_v4f32_const(
; AVX-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
; AVX-NEXT: ret <4 x float> [[TMP1]]
;
; AVX512-LABEL: @fmul_fdiv_v4f32_const(
; AVX512-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
; AVX512-NEXT: ret <4 x float> [[TMP1]]
;
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
%ab0 = fmul float %a0, 2.0
%ab1 = fmul float %a1, 1.0
%ab2 = fdiv float %a2, 1.0
%ab3 = fdiv float %a3, 0.5
%r0 = insertelement <4 x float> poison, float %ab0, i32 0
%r1 = insertelement <4 x float> %r0, float %ab1, i32 1
%r2 = insertelement <4 x float> %r1, float %ab2, i32 2
%r3 = insertelement <4 x float> %r2, float %ab3, i32 3
ret <4 x float> %r3
}

View File

@ -0,0 +1,497 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX1
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: @add_sub_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[R7]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
%a2 = extractelement <8 x i32> %a, i32 2
%a3 = extractelement <8 x i32> %a, i32 3
%a4 = extractelement <8 x i32> %a, i32 4
%a5 = extractelement <8 x i32> %a, i32 5
%a6 = extractelement <8 x i32> %a, i32 6
%a7 = extractelement <8 x i32> %a, i32 7
%b0 = extractelement <8 x i32> %b, i32 0
%b1 = extractelement <8 x i32> %b, i32 1
%b2 = extractelement <8 x i32> %b, i32 2
%b3 = extractelement <8 x i32> %b, i32 3
%b4 = extractelement <8 x i32> %b, i32 4
%b5 = extractelement <8 x i32> %b, i32 5
%b6 = extractelement <8 x i32> %b, i32 6
%b7 = extractelement <8 x i32> %b, i32 7
%ab0 = add i32 %a0, %b0
%ab1 = add i32 %a1, %b1
%ab2 = add i32 %a2, %b2
%ab3 = add i32 %a3, %b3
%ab4 = sub i32 %a4, %b4
%ab5 = sub i32 %a5, %b5
%ab6 = sub i32 %a6, %b6
%ab7 = sub i32 %a7, %b7
%r0 = insertelement <8 x i32> poison, i32 %ab0, i32 0
%r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
%r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
%r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
%r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
%r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
%r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
%r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
ret <8 x i32> %r7
}
define <4 x i32> @add_and_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @add_and_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A]], [[B]]
; CHECK-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: ret <4 x i32> [[R3]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
%a2 = extractelement <4 x i32> %a, i32 2
%a3 = extractelement <4 x i32> %a, i32 3
%b0 = extractelement <4 x i32> %b, i32 0
%b1 = extractelement <4 x i32> %b, i32 1
%b2 = extractelement <4 x i32> %b, i32 2
%b3 = extractelement <4 x i32> %b, i32 3
%ab0 = add i32 %a0, %b0
%ab1 = add i32 %a1, %b1
%ab2 = and i32 %a2, %b2
%ab3 = and i32 %a3, %b3
%r0 = insertelement <4 x i32> poison, i32 %ab0, i32 0
%r1 = insertelement <4 x i32> %r0, i32 %ab1, i32 1
%r2 = insertelement <4 x i32> %r1, i32 %ab2, i32 2
%r3 = insertelement <4 x i32> %r2, i32 %ab3, i32 3
ret <4 x i32> %r3
}
define <4 x i32> @add_mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @add_mul_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]]
; CHECK-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: ret <4 x i32> [[R3]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
%a2 = extractelement <4 x i32> %a, i32 2
%a3 = extractelement <4 x i32> %a, i32 3
%b0 = extractelement <4 x i32> %b, i32 0
%b1 = extractelement <4 x i32> %b, i32 1
%b2 = extractelement <4 x i32> %b, i32 2
%b3 = extractelement <4 x i32> %b, i32 3
%ab0 = mul i32 %a0, %b0
%ab1 = add i32 %a1, %b1
%ab2 = add i32 %a2, %b2
%ab3 = mul i32 %a3, %b3
%r0 = insertelement <4 x i32> poison, i32 %ab0, i32 0
%r1 = insertelement <4 x i32> %r0, i32 %ab1, i32 1
%r2 = insertelement <4 x i32> %r1, i32 %ab2, i32 2
%r3 = insertelement <4 x i32> %r2, i32 %ab3, i32 3
ret <4 x i32> %r3
}
define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-LABEL: @ashr_shl_v8i32(
; SSE-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
; SSE-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <8 x i32> [[R7]]
;
; AVX1-LABEL: @ashr_shl_v8i32(
; AVX1-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
; AVX1-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
; AVX1-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
; AVX1-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
; AVX1-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
; AVX1-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
; AVX1-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
; AVX1-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
; AVX1-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
; AVX1-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
; AVX1-NEXT: [[AB2:%.*]] = ashr i32 [[A2]], [[B2]]
; AVX1-NEXT: [[AB3:%.*]] = ashr i32 [[A3]], [[B3]]
; AVX1-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[A]], [[B]]
; AVX1-NEXT: [[R0:%.*]] = insertelement <8 x i32> poison, i32 [[AB0]], i32 0
; AVX1-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
; AVX1-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
; AVX1-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
; AVX1-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[R3]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: ret <8 x i32> [[R7]]
;
; AVX2-LABEL: @ashr_shl_v8i32(
; AVX2-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
; AVX2-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; AVX2-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x i32> [[R7]]
;
; AVX512-LABEL: @ashr_shl_v8i32(
; AVX512-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[R7]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
%a2 = extractelement <8 x i32> %a, i32 2
%a3 = extractelement <8 x i32> %a, i32 3
%a4 = extractelement <8 x i32> %a, i32 4
%a5 = extractelement <8 x i32> %a, i32 5
%a6 = extractelement <8 x i32> %a, i32 6
%a7 = extractelement <8 x i32> %a, i32 7
%b0 = extractelement <8 x i32> %b, i32 0
%b1 = extractelement <8 x i32> %b, i32 1
%b2 = extractelement <8 x i32> %b, i32 2
%b3 = extractelement <8 x i32> %b, i32 3
%b4 = extractelement <8 x i32> %b, i32 4
%b5 = extractelement <8 x i32> %b, i32 5
%b6 = extractelement <8 x i32> %b, i32 6
%b7 = extractelement <8 x i32> %b, i32 7
%ab0 = ashr i32 %a0, %b0
%ab1 = ashr i32 %a1, %b1
%ab2 = ashr i32 %a2, %b2
%ab3 = ashr i32 %a3, %b3
%ab4 = shl i32 %a4, %b4
%ab5 = shl i32 %a5, %b5
%ab6 = shl i32 %a6, %b6
%ab7 = shl i32 %a7, %b7
%r0 = insertelement <8 x i32> poison, i32 %ab0, i32 0
%r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
%r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
%r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
%r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
%r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
%r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
%r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
ret <8 x i32> %r7
}
define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) {
; SSE-LABEL: @ashr_shl_v8i32_const(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
; SSE-NEXT: [[R7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x i32> [[R7]]
;
; AVX1-LABEL: @ashr_shl_v8i32_const(
; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX1-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
; AVX1-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; AVX1-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: [[R7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; AVX1-NEXT: ret <8 x i32> [[R7]]
;
; AVX2-LABEL: @ashr_shl_v8i32_const(
; AVX2-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x i32> [[R7]]
;
; AVX512-LABEL: @ashr_shl_v8i32_const(
; AVX512-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[R7]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
%a2 = extractelement <8 x i32> %a, i32 2
%a3 = extractelement <8 x i32> %a, i32 3
%a4 = extractelement <8 x i32> %a, i32 4
%a5 = extractelement <8 x i32> %a, i32 5
%a6 = extractelement <8 x i32> %a, i32 6
%a7 = extractelement <8 x i32> %a, i32 7
%ab0 = ashr i32 %a0, 2
%ab1 = ashr i32 %a1, 2
%ab2 = ashr i32 %a2, 2
%ab3 = ashr i32 %a3, 2
%ab4 = shl i32 %a4, 3
%ab5 = shl i32 %a5, 3
%ab6 = shl i32 %a6, 3
%ab7 = shl i32 %a7, 3
%r0 = insertelement <8 x i32> poison, i32 %ab0, i32 0
%r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
%r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
%r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
%r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
%r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
%r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
%r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
ret <8 x i32> %r7
}
define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-LABEL: @ashr_lshr_shl_v8i32(
; SSE-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
; SSE-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
; SSE-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
; SSE-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
; SSE-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
; SSE-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A]], [[B]]
; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
; SSE-NEXT: [[R0:%.*]] = insertelement <8 x i32> poison, i32 [[AB0]], i32 0
; SSE-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
; SSE-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
; SSE-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP2]], i32 2
; SSE-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
; SSE-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP3]], i32 3
; SSE-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
; SSE-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP4]], i32 4
; SSE-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
; SSE-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP5]], i32 5
; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
; SSE-NEXT: ret <8 x i32> [[R7]]
;
; AVX1-LABEL: @ashr_lshr_shl_v8i32(
; AVX1-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
; AVX1-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
; AVX1-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
; AVX1-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
; AVX1-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
; AVX1-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
; AVX1-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
; AVX1-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
; AVX1-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
; AVX1-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
; AVX1-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A]], [[B]]
; AVX1-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
; AVX1-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
; AVX1-NEXT: [[R0:%.*]] = insertelement <8 x i32> poison, i32 [[AB0]], i32 0
; AVX1-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
; AVX1-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
; AVX1-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP2]], i32 2
; AVX1-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
; AVX1-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP3]], i32 3
; AVX1-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
; AVX1-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP4]], i32 4
; AVX1-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
; AVX1-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP5]], i32 5
; AVX1-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
; AVX1-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
; AVX1-NEXT: ret <8 x i32> [[R7]]
;
; AVX2-LABEL: @ashr_lshr_shl_v8i32(
; AVX2-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 6
; AVX2-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
; AVX2-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6
; AVX2-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]]
; AVX2-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]]
; AVX2-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[A]], [[B]]
; AVX2-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
; AVX2-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
; AVX2-NEXT: [[R0:%.*]] = insertelement <8 x i32> poison, i32 [[TMP6]], i32 0
; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
; AVX2-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP7]], i32 1
; AVX2-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
; AVX2-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP8]], i32 2
; AVX2-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
; AVX2-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP9]], i32 3
; AVX2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP5]], i32 4
; AVX2-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP10]], i32 4
; AVX2-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP5]], i32 5
; AVX2-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP11]], i32 5
; AVX2-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
; AVX2-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
; AVX2-NEXT: ret <8 x i32> [[R7]]
;
; AVX512-LABEL: @ashr_lshr_shl_v8i32(
; AVX512-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 6
; AVX512-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
; AVX512-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6
; AVX512-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX512-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]]
; AVX512-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]]
; AVX512-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[A]], [[B]]
; AVX512-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
; AVX512-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
; AVX512-NEXT: [[R0:%.*]] = insertelement <8 x i32> poison, i32 [[TMP6]], i32 0
; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP7]], i32 1
; AVX512-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
; AVX512-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP8]], i32 2
; AVX512-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
; AVX512-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP9]], i32 3
; AVX512-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP5]], i32 4
; AVX512-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP10]], i32 4
; AVX512-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP5]], i32 5
; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP11]], i32 5
; AVX512-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
; AVX512-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
; AVX512-NEXT: ret <8 x i32> [[R7]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
%a2 = extractelement <8 x i32> %a, i32 2
%a3 = extractelement <8 x i32> %a, i32 3
%a4 = extractelement <8 x i32> %a, i32 4
%a5 = extractelement <8 x i32> %a, i32 5
%a6 = extractelement <8 x i32> %a, i32 6
%a7 = extractelement <8 x i32> %a, i32 7
%b0 = extractelement <8 x i32> %b, i32 0
%b1 = extractelement <8 x i32> %b, i32 1
%b2 = extractelement <8 x i32> %b, i32 2
%b3 = extractelement <8 x i32> %b, i32 3
%b4 = extractelement <8 x i32> %b, i32 4
%b5 = extractelement <8 x i32> %b, i32 5
%b6 = extractelement <8 x i32> %b, i32 6
%b7 = extractelement <8 x i32> %b, i32 7
%ab0 = ashr i32 %a0, %b0
%ab1 = ashr i32 %a1, %b1
%ab2 = lshr i32 %a2, %b2
%ab3 = lshr i32 %a3, %b3
%ab4 = lshr i32 %a4, %b4
%ab5 = lshr i32 %a5, %b5
%ab6 = shl i32 %a6, %b6
%ab7 = shl i32 %a7, %b7
%r0 = insertelement <8 x i32> poison, i32 %ab0, i32 0
%r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
%r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
%r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
%r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
%r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
%r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
%r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
ret <8 x i32> %r7
}
define <8 x i32> @add_v8i32_undefs(<8 x i32> %a) {
; CHECK-LABEL: @add_v8i32_undefs(
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], <i32 undef, i32 4, i32 8, i32 16, i32 undef, i32 4, i32 8, i32 16>
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
%a2 = extractelement <8 x i32> %a, i32 2
%a3 = extractelement <8 x i32> %a, i32 3
%a4 = extractelement <8 x i32> %a, i32 4
%a5 = extractelement <8 x i32> %a, i32 5
%a6 = extractelement <8 x i32> %a, i32 6
%a7 = extractelement <8 x i32> %a, i32 7
%ab0 = add i32 %a0, undef
%ab1 = add i32 %a1, 4
%ab2 = add i32 %a2, 8
%ab3 = add i32 %a3, 16
%ab4 = add i32 %a4, undef
%ab5 = add i32 %a5, 4
%ab6 = add i32 %a6, 8
%ab7 = add i32 %a7, 16
%r0 = insertelement <8 x i32> poison, i32 %ab0, i32 0
%r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
%r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
%r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
%r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
%r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
%r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
%r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
ret <8 x i32> %r7
}
define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) {
; CHECK-LABEL: @sdiv_v8i32_undefs(
; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 1
; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
; CHECK-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4
; CHECK-NEXT: [[AB2:%.*]] = sdiv i32 [[A2]], 8
; CHECK-NEXT: [[AB3:%.*]] = sdiv i32 [[A3]], 16
; CHECK-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4
; CHECK-NEXT: [[AB6:%.*]] = sdiv i32 [[A6]], 8
; CHECK-NEXT: [[AB7:%.*]] = sdiv i32 [[A7]], 16
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> undef, i32 [[AB1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[AB2]], i32 2
; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[AB3]], i32 3
; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
; CHECK-NEXT: ret <8 x i32> [[R7]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
%a2 = extractelement <8 x i32> %a, i32 2
%a3 = extractelement <8 x i32> %a, i32 3
%a4 = extractelement <8 x i32> %a, i32 4
%a5 = extractelement <8 x i32> %a, i32 5
%a6 = extractelement <8 x i32> %a, i32 6
%a7 = extractelement <8 x i32> %a, i32 7
%ab0 = sdiv i32 %a0, undef
%ab1 = sdiv i32 %a1, 4
%ab2 = sdiv i32 %a2, 8
%ab3 = sdiv i32 %a3, 16
%ab4 = sdiv i32 %a4, undef
%ab5 = sdiv i32 %a5, 4
%ab6 = sdiv i32 %a6, 8
%ab7 = sdiv i32 %a7, 16
%r0 = insertelement <8 x i32> poison, i32 %ab0, i32 0
%r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
%r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
%r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
%r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
%r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
%r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
%r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
ret <8 x i32> %r7
}
define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) {
; CHECK-LABEL: @add_sub_v8i32_splat(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> undef, i32 [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]]
; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[R7]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
%a2 = extractelement <8 x i32> %a, i32 2
%a3 = extractelement <8 x i32> %a, i32 3
%a4 = extractelement <8 x i32> %a, i32 4
%a5 = extractelement <8 x i32> %a, i32 5
%a6 = extractelement <8 x i32> %a, i32 6
%a7 = extractelement <8 x i32> %a, i32 7
%ab0 = add i32 %a0, %b
%ab1 = add i32 %b, %a1
%ab2 = add i32 %a2, %b
%ab3 = add i32 %b, %a3
%ab4 = sub i32 %b, %a4
%ab5 = sub i32 %b, %a5
%ab6 = sub i32 %b, %a6
%ab7 = sub i32 %b, %a7
%r0 = insertelement <8 x i32> poison, i32 %ab0, i32 0
%r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
%r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
%r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
%r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
%r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
%r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
%r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
ret <8 x i32> %r7
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,200 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux -mcpu=bdver2 -instcombine | FileCheck %s
define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @g(
; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x i8> [[Y:%.*]], i32 1
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x i8> poison, i8 [[X0X0]], i32 0
; CHECK-NEXT: [[INS2:%.*]] = insertelement <2 x i8> [[INS1]], i8 [[Y1Y1]], i32 1
; CHECK-NEXT: ret <2 x i8> [[INS2]]
;
%x0 = extractelement <2 x i8> %x, i32 0
%y1 = extractelement <2 x i8> %y, i32 1
%x0x0 = mul i8 %x0, %x0
%y1y1 = mul i8 %y1, %y1
%ins1 = insertelement <2 x i8> poison, i8 %x0x0, i32 0
%ins2 = insertelement <2 x i8> %ins1, i8 %y1y1, i32 1
ret <2 x i8> %ins2
}
define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @h(
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x i8> poison, i8 [[X0X0]], i32 0
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i8> [[INS1]], i8 [[X3X3]], i32 1
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]], i32 2
; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]], i32 3
; CHECK-NEXT: ret <4 x i8> [[INS4]]
;
%x0 = extractelement <4 x i8> %x, i32 0
%x3 = extractelement <4 x i8> %x, i32 3
%y1 = extractelement <4 x i8> %y, i32 1
%y2 = extractelement <4 x i8> %y, i32 2
%x0x0 = mul i8 %x0, %x0
%x3x3 = mul i8 %x3, %x3
%y1y1 = mul i8 %y1, %y1
%y2y2 = mul i8 %y2, %y2
%ins1 = insertelement <4 x i8> poison, i8 %x0x0, i32 0
%ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
%ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
%ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
ret <4 x i8> %ins4
}
define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @h_undef(
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 3
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i8> undef, i8 [[X3X3]], i32 1
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]], i32 2
; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]], i32 3
; CHECK-NEXT: ret <4 x i8> [[INS4]]
;
%x0 = extractelement <4 x i8> undef, i32 0
%x3 = extractelement <4 x i8> %x, i32 3
%y1 = extractelement <4 x i8> %y, i32 1
%y2 = extractelement <4 x i8> %y, i32 2
%x0x0 = mul i8 %x0, %x0
%x3x3 = mul i8 %x3, %x3
%y1y1 = mul i8 %y1, %y1
%y2y2 = mul i8 %y2, %y2
%ins1 = insertelement <4 x i8> poison, i8 %x0x0, i32 0
%ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
%ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
%ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
ret <4 x i8> %ins4
}
define i8 @i(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @i(
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[Y1Y1]], [[Y2Y2]]
; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret i8 [[TMP3]]
;
%x0 = extractelement <4 x i8> %x, i32 0
%x3 = extractelement <4 x i8> %x, i32 3
%y1 = extractelement <4 x i8> %y, i32 1
%y2 = extractelement <4 x i8> %y, i32 2
%x0x0 = mul i8 %x0, %x0
%x3x3 = mul i8 %x3, %x3
%y1y1 = mul i8 %y1, %y1
%y2y2 = mul i8 %y2, %y2
%1 = add i8 %x0x0, %x3x3
%2 = add i8 %y1y1, %y2y2
%3 = add i8 %1, %2
ret i8 %3
}
define i8 @j(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @j(
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[Y1Y1]], [[Y2Y2]]
; CHECK-NEXT: [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret i8 [[TMP3]]
;
%x0 = extractelement <4 x i8> %x, i32 0
%x3 = extractelement <4 x i8> %x, i32 3
%y1 = extractelement <4 x i8> %y, i32 1
%y2 = extractelement <4 x i8> %y, i32 2
%x0x0 = mul i8 %x0, %x0
%x3x3 = mul i8 %x3, %x3
%y1y1 = mul i8 %y1, %y1
%y2y2 = mul i8 %y2, %y2
%1 = add i8 %x0x0, %x3x3
%2 = add i8 %y1y1, %y2y2
%3 = sdiv i8 %1, %2
ret i8 %3
}
define i8 @k(<4 x i8> %x) {
; CHECK-LABEL: @k(
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i8> [[X]], i32 1
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i8> [[X]], i32 2
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
; CHECK-NEXT: [[X1X1:%.*]] = mul i8 [[X1]], [[X1]]
; CHECK-NEXT: [[X2X2:%.*]] = mul i8 [[X2]], [[X2]]
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X1X1]], [[X2X2]]
; CHECK-NEXT: [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret i8 [[TMP3]]
;
%x0 = extractelement <4 x i8> %x, i32 0
%x3 = extractelement <4 x i8> %x, i32 3
%x1 = extractelement <4 x i8> %x, i32 1
%x2 = extractelement <4 x i8> %x, i32 2
%x0x0 = mul i8 %x0, %x0
%x3x3 = mul i8 %x3, %x3
%x1x1 = mul i8 %x1, %x1
%x2x2 = mul i8 %x2, %x2
%1 = add i8 %x0x0, %x3x3
%2 = add i8 %x1x1, %x2x2
%3 = sdiv i8 %1, %2
ret i8 %3
}
define i8 @k_bb(<4 x i8> %x) {
; CHECK-LABEL: @k_bb(
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i8> [[X]], i32 1
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i8> [[X]], i32 2
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
; CHECK-NEXT: [[X1X1:%.*]] = mul i8 [[X1]], [[X1]]
; CHECK-NEXT: [[X2X2:%.*]] = mul i8 [[X2]], [[X2]]
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X1X1]], [[X2X2]]
; CHECK-NEXT: [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret i8 [[TMP3]]
;
%x0 = extractelement <4 x i8> %x, i32 0
br label %bb1
bb1:
%x3 = extractelement <4 x i8> %x, i32 3
%x1 = extractelement <4 x i8> %x, i32 1
%x2 = extractelement <4 x i8> %x, i32 2
%x0x0 = mul i8 %x0, %x0
%x3x3 = mul i8 %x3, %x3
%x1x1 = mul i8 %x1, %x1
%x2x2 = mul i8 %x2, %x2
%1 = add i8 %x0x0, %x3x3
%2 = add i8 %x1x1, %x2x2
%3 = sdiv i8 %1, %2
ret i8 %3
}

View File

@ -0,0 +1,283 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+sse2 | FileCheck %s
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+avx | FileCheck %s
;
; Check that we can commute operands based on the predicate.
;
define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, i32* %b) {
; CHECK-LABEL: @icmp_eq_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], [[A:%.*]]
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
%a2 = extractelement <4 x i32> %a, i32 2
%a3 = extractelement <4 x i32> %a, i32 3
%p0 = getelementptr inbounds i32, i32* %b, i32 0
%p1 = getelementptr inbounds i32, i32* %b, i32 1
%p2 = getelementptr inbounds i32, i32* %b, i32 2
%p3 = getelementptr inbounds i32, i32* %b, i32 3
%b0 = load i32, i32* %p0, align 4
%b1 = load i32, i32* %p1, align 4
%b2 = load i32, i32* %p2, align 4
%b3 = load i32, i32* %p3, align 4
%c0 = icmp eq i32 %a0, %b0
%c1 = icmp eq i32 %b1, %a1
%c2 = icmp eq i32 %b2, %a2
%c3 = icmp eq i32 %a3, %b3
%d0 = insertelement <4 x i1> poison, i1 %c0, i32 0
%d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
%d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
%d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
%r = sext <4 x i1> %d3 to <4 x i32>
ret <4 x i32> %r
}
define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, i32* %b) {
; CHECK-LABEL: @icmp_ne_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], [[A:%.*]]
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
%a2 = extractelement <4 x i32> %a, i32 2
%a3 = extractelement <4 x i32> %a, i32 3
%p0 = getelementptr inbounds i32, i32* %b, i32 0
%p1 = getelementptr inbounds i32, i32* %b, i32 1
%p2 = getelementptr inbounds i32, i32* %b, i32 2
%p3 = getelementptr inbounds i32, i32* %b, i32 3
%b0 = load i32, i32* %p0, align 4
%b1 = load i32, i32* %p1, align 4
%b2 = load i32, i32* %p2, align 4
%b3 = load i32, i32* %p3, align 4
%c0 = icmp ne i32 %a0, %b0
%c1 = icmp ne i32 %b1, %a1
%c2 = icmp ne i32 %b2, %a2
%c3 = icmp ne i32 %a3, %b3
%d0 = insertelement <4 x i1> poison, i1 %c0, i32 0
%d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
%d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
%d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
%r = sext <4 x i1> %d3 to <4 x i32>
ret <4 x i32> %r
}
define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, float* %b) {
; CHECK-LABEL: @fcmp_oeq_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], [[A:%.*]]
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
%p0 = getelementptr inbounds float, float* %b, i32 0
%p1 = getelementptr inbounds float, float* %b, i32 1
%p2 = getelementptr inbounds float, float* %b, i32 2
%p3 = getelementptr inbounds float, float* %b, i32 3
%b0 = load float, float* %p0, align 4
%b1 = load float, float* %p1, align 4
%b2 = load float, float* %p2, align 4
%b3 = load float, float* %p3, align 4
%c0 = fcmp oeq float %a0, %b0
%c1 = fcmp oeq float %b1, %a1
%c2 = fcmp oeq float %b2, %a2
%c3 = fcmp oeq float %a3, %b3
%d0 = insertelement <4 x i1> poison, i1 %c0, i32 0
%d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
%d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
%d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
%r = sext <4 x i1> %d3 to <4 x i32>
ret <4 x i32> %r
}
define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, float* %b) {
; CHECK-LABEL: @fcmp_uno_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[TMP2]], [[A:%.*]]
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
%p0 = getelementptr inbounds float, float* %b, i32 0
%p1 = getelementptr inbounds float, float* %b, i32 1
%p2 = getelementptr inbounds float, float* %b, i32 2
%p3 = getelementptr inbounds float, float* %b, i32 3
%b0 = load float, float* %p0, align 4
%b1 = load float, float* %p1, align 4
%b2 = load float, float* %p2, align 4
%b3 = load float, float* %p3, align 4
%c0 = fcmp uno float %a0, %b0
%c1 = fcmp uno float %b1, %a1
%c2 = fcmp uno float %b2, %a2
%c3 = fcmp uno float %a3, %b3
%d0 = insertelement <4 x i1> poison, i1 %c0, i32 0
%d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
%d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
%d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
%r = sext <4 x i1> %d3 to <4 x i32>
ret <4 x i32> %r
}
;
; Check that we can commute operands by swapping the predicate.
;
define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, i32* %b) {
; CHECK-LABEL: @icmp_sgt_slt_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP2]], [[A:%.*]]
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
%a2 = extractelement <4 x i32> %a, i32 2
%a3 = extractelement <4 x i32> %a, i32 3
%p0 = getelementptr inbounds i32, i32* %b, i32 0
%p1 = getelementptr inbounds i32, i32* %b, i32 1
%p2 = getelementptr inbounds i32, i32* %b, i32 2
%p3 = getelementptr inbounds i32, i32* %b, i32 3
%b0 = load i32, i32* %p0, align 4
%b1 = load i32, i32* %p1, align 4
%b2 = load i32, i32* %p2, align 4
%b3 = load i32, i32* %p3, align 4
%c0 = icmp sgt i32 %a0, %b0
%c1 = icmp slt i32 %b1, %a1
%c2 = icmp slt i32 %b2, %a2
%c3 = icmp sgt i32 %a3, %b3
%d0 = insertelement <4 x i1> poison, i1 %c0, i32 0
%d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
%d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
%d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
%r = sext <4 x i1> %d3 to <4 x i32>
ret <4 x i32> %r
}
define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, i32* %b) {
; CHECK-LABEL: @icmp_uge_ule_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[TMP2]], [[A:%.*]]
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
%a2 = extractelement <4 x i32> %a, i32 2
%a3 = extractelement <4 x i32> %a, i32 3
%p0 = getelementptr inbounds i32, i32* %b, i32 0
%p1 = getelementptr inbounds i32, i32* %b, i32 1
%p2 = getelementptr inbounds i32, i32* %b, i32 2
%p3 = getelementptr inbounds i32, i32* %b, i32 3
%b0 = load i32, i32* %p0, align 4
%b1 = load i32, i32* %p1, align 4
%b2 = load i32, i32* %p2, align 4
%b3 = load i32, i32* %p3, align 4
%c0 = icmp uge i32 %a0, %b0
%c1 = icmp ule i32 %b1, %a1
%c2 = icmp ule i32 %b2, %a2
%c3 = icmp uge i32 %a3, %b3
%d0 = insertelement <4 x i1> poison, i1 %c0, i32 0
%d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
%d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
%d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
%r = sext <4 x i1> %d3 to <4 x i32>
ret <4 x i32> %r
}
define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, float* %b) {
; CHECK-LABEL: @fcmp_ogt_olt_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <4 x float> [[TMP2]], [[A:%.*]]
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
%p0 = getelementptr inbounds float, float* %b, i32 0
%p1 = getelementptr inbounds float, float* %b, i32 1
%p2 = getelementptr inbounds float, float* %b, i32 2
%p3 = getelementptr inbounds float, float* %b, i32 3
%b0 = load float, float* %p0, align 4
%b1 = load float, float* %p1, align 4
%b2 = load float, float* %p2, align 4
%b3 = load float, float* %p3, align 4
%c0 = fcmp ogt float %a0, %b0
%c1 = fcmp olt float %b1, %a1
%c2 = fcmp olt float %b2, %a2
%c3 = fcmp ogt float %a3, %b3
%d0 = insertelement <4 x i1> poison, i1 %c0, i32 0
%d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
%d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
%d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
%r = sext <4 x i1> %d3 to <4 x i32>
ret <4 x i32> %r
}
define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, float* %b) {
; CHECK-LABEL: @fcmp_ord_uno_v4i32(
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
; CHECK-NEXT: [[B0:%.*]] = load float, float* [[B]], align 4
; CHECK-NEXT: [[B1:%.*]] = load float, float* [[P1]], align 4
; CHECK-NEXT: [[B2:%.*]] = load float, float* [[P2]], align 4
; CHECK-NEXT: [[B3:%.*]] = load float, float* [[P3]], align 4
; CHECK-NEXT: [[C0:%.*]] = fcmp ord float [[A0]], [[B0]]
; CHECK-NEXT: [[C1:%.*]] = fcmp uno float [[B1]], [[A1]]
; CHECK-NEXT: [[C2:%.*]] = fcmp uno float [[B2]], [[A2]]
; CHECK-NEXT: [[C3:%.*]] = fcmp ord float [[A3]], [[B3]]
; CHECK-NEXT: [[D0:%.*]] = insertelement <4 x i1> poison, i1 [[C0]], i32 0
; CHECK-NEXT: [[D1:%.*]] = insertelement <4 x i1> [[D0]], i1 [[C1]], i32 1
; CHECK-NEXT: [[D2:%.*]] = insertelement <4 x i1> [[D1]], i1 [[C2]], i32 2
; CHECK-NEXT: [[D3:%.*]] = insertelement <4 x i1> [[D2]], i1 [[C3]], i32 3
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[D3]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
%p0 = getelementptr inbounds float, float* %b, i32 0
%p1 = getelementptr inbounds float, float* %b, i32 1
%p2 = getelementptr inbounds float, float* %b, i32 2
%p3 = getelementptr inbounds float, float* %b, i32 3
%b0 = load float, float* %p0, align 4
%b1 = load float, float* %p1, align 4
%b2 = load float, float* %p2, align 4
%b3 = load float, float* %p3, align 4
%c0 = fcmp ord float %a0, %b0
%c1 = fcmp uno float %b1, %a1
%c2 = fcmp uno float %b2, %a2
%c3 = fcmp ord float %a3, %b3
%d0 = insertelement <4 x i1> poison, i1 %c0, i32 0
%d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
%d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
%d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
%r = sext <4 x i1> %d3 to <4 x i32>
ret <4 x i32> %r
}

View File

@ -0,0 +1,81 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin13.3.0"
define void @_foo(double %p1, double %p2, double %p3) #0 {
; CHECK-LABEL: @_foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TAB1:%.*]] = alloca [256 x i32], align 16
; CHECK-NEXT: [[TAB2:%.*]] = alloca [256 x i32], align 16
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[MUL19:%.*]] = fmul double [[P1:%.*]], 1.638400e+04
; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
; CHECK-NEXT: [[MUL21:%.*]] = fmul double [[P2:%.*]], 1.638400e+04
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[T_0259:%.*]] = phi double [ 0.000000e+00, [[BB1]] ], [ [[ADD27:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[P3_ADDR_0258:%.*]] = phi double [ [[ADD]], [[BB1]] ], [ [[ADD28:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[T_0259]], i32 0
; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, [[TBAA0:!tbaa !.*]]
; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[P3_ADDR_0258]], i32 0
; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, [[TBAA0]]
; CHECK-NEXT: [[ADD27]] = fadd double [[MUL19]], [[T_0259]]
; CHECK-NEXT: [[ADD28]] = fadd double [[MUL21]], [[P3_ADDR_0258]]
; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]
; CHECK: return:
; CHECK-NEXT: ret void
;
entry:
%tab1 = alloca [256 x i32], align 16
%tab2 = alloca [256 x i32], align 16
br label %bb1
bb1:
%mul19 = fmul double %p1, 1.638400e+04
%mul20 = fmul double %p3, 1.638400e+04
%add = fadd double %mul20, 8.192000e+03
%mul21 = fmul double %p2, 1.638400e+04
; The SLPVectorizer crashed when scheduling this block after it inserted an
; insertelement instruction (during vectorizing the for.body block) at this position.
br label %for.body
for.body:
%indvars.iv266 = phi i64 [ 0, %bb1 ], [ %indvars.iv.next267, %for.body ]
%t.0259 = phi double [ 0.000000e+00, %bb1 ], [ %add27, %for.body ]
%p3.addr.0258 = phi double [ %add, %bb1 ], [ %add28, %for.body ]
%vecinit.i.i237 = insertelement <2 x double> poison, double %t.0259, i32 0
%x13 = tail call i32 @_xfn(<2 x double> %vecinit.i.i237) #2
%arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* %tab1, i64 0, i64 %indvars.iv266
store i32 %x13, i32* %arrayidx, align 4, !tbaa !4
%vecinit.i.i = insertelement <2 x double> poison, double %p3.addr.0258, i32 0
%x14 = tail call i32 @_xfn(<2 x double> %vecinit.i.i) #2
%arrayidx26 = getelementptr inbounds [256 x i32], [256 x i32]* %tab2, i64 0, i64 %indvars.iv266
store i32 %x14, i32* %arrayidx26, align 4, !tbaa !4
%add27 = fadd double %mul19, %t.0259
%add28 = fadd double %mul21, %p3.addr.0258
%indvars.iv.next267 = add nuw nsw i64 %indvars.iv266, 1
%exitcond = icmp eq i64 %indvars.iv.next267, 256
br i1 %exitcond, label %return, label %for.body
return:
ret void
}
declare i32 @_xfn(<2 x double>) #4
!3 = !{!"int", !5, i64 0}
!4 = !{!3, !3, i64 0}
!5 = !{!"omnipotent char", !6, i64 0}
!6 = !{!"Simple C/C++ TBAA"}

View File

@ -0,0 +1,43 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -mtriple=x86_64-unknown -mattr=+avx -slp-vectorizer | FileCheck %s
@array = external global [20 x [13 x i32]]
define void @hoge(i64 %idx, <4 x i32>* %sink) {
; CHECK-LABEL: @hoge(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX:%.*]], i64 5
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 6
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 7
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 8
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP8]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 2
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP10]], i32 2
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP12]], i32 3
; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[SINK:%.*]], align 16
; CHECK-NEXT: ret void
;
bb:
%0 = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 %idx, i64 5
%1 = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 %idx, i64 6
%2 = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 %idx, i64 7
%3 = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 %idx, i64 8
%4 = load i32, i32* %1, align 4
%5 = insertelement <4 x i32> poison, i32 %4, i32 0
%6 = load i32, i32* %2, align 4
%7 = insertelement <4 x i32> %5, i32 %6, i32 1
%8 = load i32, i32* %3, align 4
%9 = insertelement <4 x i32> %7, i32 %8, i32 2
%10 = load i32, i32* %0, align 4
%11 = insertelement <4 x i32> %9, i32 %10, i32 3
store <4 x i32> %11, <4 x i32>* %sink
ret void
}

View File

@ -0,0 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux -mcpu=bdver2 -slp-schedule-budget=1 | FileCheck %s
define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @g(
; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x i8> [[Y:%.*]], i32 1
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x i8> poison, i8 [[X0X0]], i32 0
; CHECK-NEXT: [[INS2:%.*]] = insertelement <2 x i8> [[INS1]], i8 [[Y1Y1]], i32 1
; CHECK-NEXT: ret <2 x i8> [[INS2]]
;
%x0 = extractelement <2 x i8> %x, i32 0
%y1 = extractelement <2 x i8> %y, i32 1
%x0x0 = mul i8 %x0, %x0
%y1y1 = mul i8 %y1, %y1
%ins1 = insertelement <2 x i8> poison, i8 %x0x0, i32 0
%ins2 = insertelement <2 x i8> %ins1, i8 %y1y1, i32 1
ret <2 x i8> %ins2
}

View File

@ -0,0 +1,534 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256NODQ
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256NODQ
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256NODQ
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256DQ
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@src64 = common global [8 x double] zeroinitializer, align 64
@src32 = common global [16 x float] zeroinitializer, align 64
@dst64 = common global [8 x i64] zeroinitializer, align 64
@dst32 = common global [16 x i32] zeroinitializer, align 64
@dst16 = common global [32 x i16] zeroinitializer, align 64
@dst8 = common global [64 x i8] zeroinitializer, align 64
;
; FPTOSI vXf64
;
define void @fptosi_8f64_8i64() #0 {
; SSE-LABEL: @fptosi_8f64_8i64(
; SSE-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
; SSE-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
; SSE-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
; SSE-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
; SSE-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
; SSE-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
; SSE-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
; SSE-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
; SSE-NEXT: [[CVT0:%.*]] = fptosi double [[A0]] to i64
; SSE-NEXT: [[CVT1:%.*]] = fptosi double [[A1]] to i64
; SSE-NEXT: [[CVT2:%.*]] = fptosi double [[A2]] to i64
; SSE-NEXT: [[CVT3:%.*]] = fptosi double [[A3]] to i64
; SSE-NEXT: [[CVT4:%.*]] = fptosi double [[A4]] to i64
; SSE-NEXT: [[CVT5:%.*]] = fptosi double [[A5]] to i64
; SSE-NEXT: [[CVT6:%.*]] = fptosi double [[A6]] to i64
; SSE-NEXT: [[CVT7:%.*]] = fptosi double [[A7]] to i64
; SSE-NEXT: store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
; SSE-NEXT: store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
; SSE-NEXT: store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
; SSE-NEXT: store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
; SSE-NEXT: store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
; SSE-NEXT: store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
; SSE-NEXT: store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
; SSE-NEXT: store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
; SSE-NEXT: ret void
;
; AVX256NODQ-LABEL: @fptosi_8f64_8i64(
; AVX256NODQ-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
; AVX256NODQ-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
; AVX256NODQ-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
; AVX256NODQ-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
; AVX256NODQ-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
; AVX256NODQ-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
; AVX256NODQ-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
; AVX256NODQ-NEXT: [[CVT0:%.*]] = fptosi double [[A0]] to i64
; AVX256NODQ-NEXT: [[CVT1:%.*]] = fptosi double [[A1]] to i64
; AVX256NODQ-NEXT: [[CVT2:%.*]] = fptosi double [[A2]] to i64
; AVX256NODQ-NEXT: [[CVT3:%.*]] = fptosi double [[A3]] to i64
; AVX256NODQ-NEXT: [[CVT4:%.*]] = fptosi double [[A4]] to i64
; AVX256NODQ-NEXT: [[CVT5:%.*]] = fptosi double [[A5]] to i64
; AVX256NODQ-NEXT: [[CVT6:%.*]] = fptosi double [[A6]] to i64
; AVX256NODQ-NEXT: [[CVT7:%.*]] = fptosi double [[A7]] to i64
; AVX256NODQ-NEXT: store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
; AVX256NODQ-NEXT: store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
; AVX256NODQ-NEXT: store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
; AVX256NODQ-NEXT: store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
; AVX256NODQ-NEXT: store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
; AVX256NODQ-NEXT: store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
; AVX256NODQ-NEXT: store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
; AVX256NODQ-NEXT: ret void
;
; AVX512-LABEL: @fptosi_8f64_8i64(
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
; AVX512-NEXT: [[TMP2:%.*]] = fptosi <8 x double> [[TMP1]] to <8 x i64>
; AVX512-NEXT: store <8 x i64> [[TMP2]], <8 x i64>* bitcast ([8 x i64]* @dst64 to <8 x i64>*), align 8
; AVX512-NEXT: ret void
;
; AVX256DQ-LABEL: @fptosi_8f64_8i64(
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
; AVX256DQ-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
; AVX256DQ-NEXT: [[TMP3:%.*]] = fptosi <4 x double> [[TMP1]] to <4 x i64>
; AVX256DQ-NEXT: [[TMP4:%.*]] = fptosi <4 x double> [[TMP2]] to <4 x i64>
; AVX256DQ-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @dst64 to <4 x i64>*), align 8
; AVX256DQ-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4) to <4 x i64>*), align 8
; AVX256DQ-NEXT: ret void
;
%a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
%a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
%a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
%a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
%a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
%a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
%a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
%a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
%cvt0 = fptosi double %a0 to i64
%cvt1 = fptosi double %a1 to i64
%cvt2 = fptosi double %a2 to i64
%cvt3 = fptosi double %a3 to i64
%cvt4 = fptosi double %a4 to i64
%cvt5 = fptosi double %a5 to i64
%cvt6 = fptosi double %a6 to i64
%cvt7 = fptosi double %a7 to i64
store i64 %cvt0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
store i64 %cvt1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
store i64 %cvt2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
store i64 %cvt3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
store i64 %cvt4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
store i64 %cvt5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
store i64 %cvt6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
store i64 %cvt7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
ret void
}
define void @fptosi_8f64_8i32() #0 {
; SSE-LABEL: @fptosi_8f64_8i32(
; SSE-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
; SSE-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
; SSE-NEXT: [[TMP3:%.*]] = fptosi <4 x double> [[TMP1]] to <4 x i32>
; SSE-NEXT: [[TMP4:%.*]] = fptosi <4 x double> [[TMP2]] to <4 x i32>
; SSE-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @dst32 to <4 x i32>*), align 4
; SSE-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 4
; SSE-NEXT: ret void
;
; AVX-LABEL: @fptosi_8f64_8i32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
; AVX-NEXT: [[TMP2:%.*]] = fptosi <8 x double> [[TMP1]] to <8 x i32>
; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4
; AVX-NEXT: ret void
;
%a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
%a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
%a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
%a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
%a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
%a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
%a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
%a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
%cvt0 = fptosi double %a0 to i32
%cvt1 = fptosi double %a1 to i32
%cvt2 = fptosi double %a2 to i32
%cvt3 = fptosi double %a3 to i32
%cvt4 = fptosi double %a4 to i32
%cvt5 = fptosi double %a5 to i32
%cvt6 = fptosi double %a6 to i32
%cvt7 = fptosi double %a7 to i32
store i32 %cvt0, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
store i32 %cvt1, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
store i32 %cvt2, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
store i32 %cvt3, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
store i32 %cvt4, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
store i32 %cvt5, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
store i32 %cvt6, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
store i32 %cvt7, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
ret void
}
define void @fptosi_8f64_8i16() #0 {
; CHECK-LABEL: @fptosi_8f64_8i16(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
; CHECK-NEXT: [[TMP2:%.*]] = fptosi <8 x double> [[TMP1]] to <8 x i16>
; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
; CHECK-NEXT: ret void
;
%a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
%a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
%a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
%a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
%a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
%a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
%a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
%a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
%cvt0 = fptosi double %a0 to i16
%cvt1 = fptosi double %a1 to i16
%cvt2 = fptosi double %a2 to i16
%cvt3 = fptosi double %a3 to i16
%cvt4 = fptosi double %a4 to i16
%cvt5 = fptosi double %a5 to i16
%cvt6 = fptosi double %a6 to i16
%cvt7 = fptosi double %a7 to i16
store i16 %cvt0, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
store i16 %cvt1, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
store i16 %cvt2, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
store i16 %cvt3, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
store i16 %cvt4, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
store i16 %cvt5, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
store i16 %cvt6, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
store i16 %cvt7, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
ret void
}
define void @fptosi_8f64_8i8() #0 {
; CHECK-LABEL: @fptosi_8f64_8i8(
; CHECK-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
; CHECK-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
; CHECK-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
; CHECK-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
; CHECK-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
; CHECK-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
; CHECK-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
; CHECK-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
; CHECK-NEXT: [[CVT0:%.*]] = fptosi double [[A0]] to i8
; CHECK-NEXT: [[CVT1:%.*]] = fptosi double [[A1]] to i8
; CHECK-NEXT: [[CVT2:%.*]] = fptosi double [[A2]] to i8
; CHECK-NEXT: [[CVT3:%.*]] = fptosi double [[A3]] to i8
; CHECK-NEXT: [[CVT4:%.*]] = fptosi double [[A4]] to i8
; CHECK-NEXT: [[CVT5:%.*]] = fptosi double [[A5]] to i8
; CHECK-NEXT: [[CVT6:%.*]] = fptosi double [[A6]] to i8
; CHECK-NEXT: [[CVT7:%.*]] = fptosi double [[A7]] to i8
; CHECK-NEXT: store i8 [[CVT0]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
; CHECK-NEXT: store i8 [[CVT1]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
; CHECK-NEXT: store i8 [[CVT2]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
; CHECK-NEXT: store i8 [[CVT3]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
; CHECK-NEXT: store i8 [[CVT4]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
; CHECK-NEXT: store i8 [[CVT5]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
; CHECK-NEXT: store i8 [[CVT6]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
; CHECK-NEXT: store i8 [[CVT7]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
; CHECK-NEXT: ret void
;
%a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
%a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
%a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
%a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
%a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
%a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
%a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
%a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
%cvt0 = fptosi double %a0 to i8
%cvt1 = fptosi double %a1 to i8
%cvt2 = fptosi double %a2 to i8
%cvt3 = fptosi double %a3 to i8
%cvt4 = fptosi double %a4 to i8
%cvt5 = fptosi double %a5 to i8
%cvt6 = fptosi double %a6 to i8
%cvt7 = fptosi double %a7 to i8
store i8 %cvt0, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
store i8 %cvt1, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
store i8 %cvt2, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
store i8 %cvt3, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
store i8 %cvt4, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
store i8 %cvt5, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
store i8 %cvt6, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
store i8 %cvt7, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
ret void
}
;
; FPTOSI vXf32
;
define void @fptosi_8f32_8i64() #0 {
; SSE-LABEL: @fptosi_8f32_8i64(
; SSE-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
; SSE-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
; SSE-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
; SSE-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
; SSE-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
; SSE-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
; SSE-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
; SSE-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
; SSE-NEXT: [[CVT0:%.*]] = fptosi float [[A0]] to i64
; SSE-NEXT: [[CVT1:%.*]] = fptosi float [[A1]] to i64
; SSE-NEXT: [[CVT2:%.*]] = fptosi float [[A2]] to i64
; SSE-NEXT: [[CVT3:%.*]] = fptosi float [[A3]] to i64
; SSE-NEXT: [[CVT4:%.*]] = fptosi float [[A4]] to i64
; SSE-NEXT: [[CVT5:%.*]] = fptosi float [[A5]] to i64
; SSE-NEXT: [[CVT6:%.*]] = fptosi float [[A6]] to i64
; SSE-NEXT: [[CVT7:%.*]] = fptosi float [[A7]] to i64
; SSE-NEXT: store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
; SSE-NEXT: store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
; SSE-NEXT: store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
; SSE-NEXT: store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
; SSE-NEXT: store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
; SSE-NEXT: store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
; SSE-NEXT: store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
; SSE-NEXT: store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
; SSE-NEXT: ret void
;
; AVX256NODQ-LABEL: @fptosi_8f32_8i64(
; AVX256NODQ-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
; AVX256NODQ-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
; AVX256NODQ-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
; AVX256NODQ-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
; AVX256NODQ-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
; AVX256NODQ-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
; AVX256NODQ-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
; AVX256NODQ-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
; AVX256NODQ-NEXT: [[CVT0:%.*]] = fptosi float [[A0]] to i64
; AVX256NODQ-NEXT: [[CVT1:%.*]] = fptosi float [[A1]] to i64
; AVX256NODQ-NEXT: [[CVT2:%.*]] = fptosi float [[A2]] to i64
; AVX256NODQ-NEXT: [[CVT3:%.*]] = fptosi float [[A3]] to i64
; AVX256NODQ-NEXT: [[CVT4:%.*]] = fptosi float [[A4]] to i64
; AVX256NODQ-NEXT: [[CVT5:%.*]] = fptosi float [[A5]] to i64
; AVX256NODQ-NEXT: [[CVT6:%.*]] = fptosi float [[A6]] to i64
; AVX256NODQ-NEXT: [[CVT7:%.*]] = fptosi float [[A7]] to i64
; AVX256NODQ-NEXT: store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
; AVX256NODQ-NEXT: store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
; AVX256NODQ-NEXT: store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
; AVX256NODQ-NEXT: store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
; AVX256NODQ-NEXT: store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
; AVX256NODQ-NEXT: store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
; AVX256NODQ-NEXT: store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
; AVX256NODQ-NEXT: ret void
;
; AVX512-LABEL: @fptosi_8f32_8i64(
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
; AVX512-NEXT: [[TMP2:%.*]] = fptosi <8 x float> [[TMP1]] to <8 x i64>
; AVX512-NEXT: store <8 x i64> [[TMP2]], <8 x i64>* bitcast ([8 x i64]* @dst64 to <8 x i64>*), align 8
; AVX512-NEXT: ret void
;
; AVX256DQ-LABEL: @fptosi_8f32_8i64(
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
; AVX256DQ-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
; AVX256DQ-NEXT: [[TMP3:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i64>
; AVX256DQ-NEXT: [[TMP4:%.*]] = fptosi <4 x float> [[TMP2]] to <4 x i64>
; AVX256DQ-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @dst64 to <4 x i64>*), align 8
; AVX256DQ-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4) to <4 x i64>*), align 8
; AVX256DQ-NEXT: ret void
;
%a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
%a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
%a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
%a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
%a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
%a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
%a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
%a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
%cvt0 = fptosi float %a0 to i64
%cvt1 = fptosi float %a1 to i64
%cvt2 = fptosi float %a2 to i64
%cvt3 = fptosi float %a3 to i64
%cvt4 = fptosi float %a4 to i64
%cvt5 = fptosi float %a5 to i64
%cvt6 = fptosi float %a6 to i64
%cvt7 = fptosi float %a7 to i64
store i64 %cvt0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
store i64 %cvt1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
store i64 %cvt2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
store i64 %cvt3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
store i64 %cvt4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
store i64 %cvt5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
store i64 %cvt6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
store i64 %cvt7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
ret void
}
define void @fptosi_8f32_8i32() #0 {
; SSE-LABEL: @fptosi_8f32_8i32(
; SSE-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
; SSE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
; SSE-NEXT: [[TMP3:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
; SSE-NEXT: [[TMP4:%.*]] = fptosi <4 x float> [[TMP2]] to <4 x i32>
; SSE-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @dst32 to <4 x i32>*), align 4
; SSE-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 4
; SSE-NEXT: ret void
;
; AVX-LABEL: @fptosi_8f32_8i32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
; AVX-NEXT: [[TMP2:%.*]] = fptosi <8 x float> [[TMP1]] to <8 x i32>
; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4
; AVX-NEXT: ret void
;
%a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
%a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
%a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
%a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
%a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
%a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
%a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
%a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
%cvt0 = fptosi float %a0 to i32
%cvt1 = fptosi float %a1 to i32
%cvt2 = fptosi float %a2 to i32
%cvt3 = fptosi float %a3 to i32
%cvt4 = fptosi float %a4 to i32
%cvt5 = fptosi float %a5 to i32
%cvt6 = fptosi float %a6 to i32
%cvt7 = fptosi float %a7 to i32
store i32 %cvt0, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
store i32 %cvt1, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
store i32 %cvt2, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
store i32 %cvt3, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
store i32 %cvt4, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
store i32 %cvt5, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
store i32 %cvt6, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
store i32 %cvt7, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
ret void
}
define void @fptosi_8f32_8i16() #0 {
; CHECK-LABEL: @fptosi_8f32_8i16(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
; CHECK-NEXT: [[TMP2:%.*]] = fptosi <8 x float> [[TMP1]] to <8 x i16>
; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
; CHECK-NEXT: ret void
;
%a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
%a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
%a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
%a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
%a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
%a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
%a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
%a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
%cvt0 = fptosi float %a0 to i16
%cvt1 = fptosi float %a1 to i16
%cvt2 = fptosi float %a2 to i16
%cvt3 = fptosi float %a3 to i16
%cvt4 = fptosi float %a4 to i16
%cvt5 = fptosi float %a5 to i16
%cvt6 = fptosi float %a6 to i16
%cvt7 = fptosi float %a7 to i16
store i16 %cvt0, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
store i16 %cvt1, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
store i16 %cvt2, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
store i16 %cvt3, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
store i16 %cvt4, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
store i16 %cvt5, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
store i16 %cvt6, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
store i16 %cvt7, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
ret void
}
define void @fptosi_8f32_8i8() #0 {
; CHECK-LABEL: @fptosi_8f32_8i8(
; CHECK-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
; CHECK-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
; CHECK-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
; CHECK-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
; CHECK-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
; CHECK-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
; CHECK-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
; CHECK-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
; CHECK-NEXT: [[CVT0:%.*]] = fptosi float [[A0]] to i8
; CHECK-NEXT: [[CVT1:%.*]] = fptosi float [[A1]] to i8
; CHECK-NEXT: [[CVT2:%.*]] = fptosi float [[A2]] to i8
; CHECK-NEXT: [[CVT3:%.*]] = fptosi float [[A3]] to i8
; CHECK-NEXT: [[CVT4:%.*]] = fptosi float [[A4]] to i8
; CHECK-NEXT: [[CVT5:%.*]] = fptosi float [[A5]] to i8
; CHECK-NEXT: [[CVT6:%.*]] = fptosi float [[A6]] to i8
; CHECK-NEXT: [[CVT7:%.*]] = fptosi float [[A7]] to i8
; CHECK-NEXT: store i8 [[CVT0]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
; CHECK-NEXT: store i8 [[CVT1]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
; CHECK-NEXT: store i8 [[CVT2]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
; CHECK-NEXT: store i8 [[CVT3]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
; CHECK-NEXT: store i8 [[CVT4]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
; CHECK-NEXT: store i8 [[CVT5]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
; CHECK-NEXT: store i8 [[CVT6]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
; CHECK-NEXT: store i8 [[CVT7]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
; CHECK-NEXT: ret void
;
%a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
%a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
%a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
%a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
%a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
%a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
%a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
%a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
%cvt0 = fptosi float %a0 to i8
%cvt1 = fptosi float %a1 to i8
%cvt2 = fptosi float %a2 to i8
%cvt3 = fptosi float %a3 to i8
%cvt4 = fptosi float %a4 to i8
%cvt5 = fptosi float %a5 to i8
%cvt6 = fptosi float %a6 to i8
%cvt7 = fptosi float %a7 to i8
store i8 %cvt0, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
store i8 %cvt1, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
store i8 %cvt2, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
store i8 %cvt3, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
store i8 %cvt4, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
store i8 %cvt5, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
store i8 %cvt6, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
store i8 %cvt7, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
ret void
}
;
; FPTOSI BUILDVECTOR
;
define <4 x i32> @fptosi_4xf64_4i32(double %a0, double %a1, double %a2, double %a3) #0 {
; CHECK-LABEL: @fptosi_4xf64_4i32(
; CHECK-NEXT: [[CVT0:%.*]] = fptosi double [[A0:%.*]] to i32
; CHECK-NEXT: [[CVT1:%.*]] = fptosi double [[A1:%.*]] to i32
; CHECK-NEXT: [[CVT2:%.*]] = fptosi double [[A2:%.*]] to i32
; CHECK-NEXT: [[CVT3:%.*]] = fptosi double [[A3:%.*]] to i32
; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x i32> poison, i32 [[CVT0]], i32 0
; CHECK-NEXT: [[RES1:%.*]] = insertelement <4 x i32> [[RES0]], i32 [[CVT1]], i32 1
; CHECK-NEXT: [[RES2:%.*]] = insertelement <4 x i32> [[RES1]], i32 [[CVT2]], i32 2
; CHECK-NEXT: [[RES3:%.*]] = insertelement <4 x i32> [[RES2]], i32 [[CVT3]], i32 3
; CHECK-NEXT: ret <4 x i32> [[RES3]]
;
%cvt0 = fptosi double %a0 to i32
%cvt1 = fptosi double %a1 to i32
%cvt2 = fptosi double %a2 to i32
%cvt3 = fptosi double %a3 to i32
%res0 = insertelement <4 x i32> poison, i32 %cvt0, i32 0
%res1 = insertelement <4 x i32> %res0, i32 %cvt1, i32 1
%res2 = insertelement <4 x i32> %res1, i32 %cvt2, i32 2
%res3 = insertelement <4 x i32> %res2, i32 %cvt3, i32 3
ret <4 x i32> %res3
}
define <4 x i32> @fptosi_4xf32_4i32(float %a0, float %a1, float %a2, float %a3) #0 {
; CHECK-LABEL: @fptosi_4xf32_4i32(
; CHECK-NEXT: [[CVT0:%.*]] = fptosi float [[A0:%.*]] to i32
; CHECK-NEXT: [[CVT1:%.*]] = fptosi float [[A1:%.*]] to i32
; CHECK-NEXT: [[CVT2:%.*]] = fptosi float [[A2:%.*]] to i32
; CHECK-NEXT: [[CVT3:%.*]] = fptosi float [[A3:%.*]] to i32
; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x i32> poison, i32 [[CVT0]], i32 0
; CHECK-NEXT: [[RES1:%.*]] = insertelement <4 x i32> [[RES0]], i32 [[CVT1]], i32 1
; CHECK-NEXT: [[RES2:%.*]] = insertelement <4 x i32> [[RES1]], i32 [[CVT2]], i32 2
; CHECK-NEXT: [[RES3:%.*]] = insertelement <4 x i32> [[RES2]], i32 [[CVT3]], i32 3
; CHECK-NEXT: ret <4 x i32> [[RES3]]
;
%cvt0 = fptosi float %a0 to i32
%cvt1 = fptosi float %a1 to i32
%cvt2 = fptosi float %a2 to i32
%cvt3 = fptosi float %a3 to i32
%res0 = insertelement <4 x i32> poison, i32 %cvt0, i32 0
%res1 = insertelement <4 x i32> %res0, i32 %cvt1, i32 1
%res2 = insertelement <4 x i32> %res1, i32 %cvt2, i32 2
%res3 = insertelement <4 x i32> %res2, i32 %cvt3, i32 3
ret <4 x i32> %res3
}
attributes #0 = { nounwind }

View File

@ -0,0 +1,433 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
;
; 128-bit vectors
;
define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: @test_v2f64(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; SSE-NEXT: ret <2 x double> [[TMP3]]
;
; SLM-LABEL: @test_v2f64(
; SLM-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1
; SLM-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1
; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]]
; SLM-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]]
; SLM-NEXT: [[R00:%.*]] = insertelement <2 x double> poison, double [[R0]], i32 0
; SLM-NEXT: [[R01:%.*]] = insertelement <2 x double> [[R00]], double [[R1]], i32 1
; SLM-NEXT: ret <2 x double> [[R01]]
;
; AVX-LABEL: @test_v2f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; AVX-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <2 x double> [[TMP3]]
;
%a0 = extractelement <2 x double> %a, i32 0
%a1 = extractelement <2 x double> %a, i32 1
%b0 = extractelement <2 x double> %b, i32 0
%b1 = extractelement <2 x double> %b, i32 1
%r0 = fadd double %a0, %a1
%r1 = fadd double %b0, %b1
%r00 = insertelement <2 x double> poison, double %r0, i32 0
%r01 = insertelement <2 x double> %r00, double %r1, i32 1
ret <2 x double> %r01
}
define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <4 x float> [[TMP3]]
;
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
%b0 = extractelement <4 x float> %b, i32 0
%b1 = extractelement <4 x float> %b, i32 1
%b2 = extractelement <4 x float> %b, i32 2
%b3 = extractelement <4 x float> %b, i32 3
%r0 = fadd float %a0, %a1
%r1 = fadd float %a2, %a3
%r2 = fadd float %b0, %b1
%r3 = fadd float %b2, %b3
%r00 = insertelement <4 x float> poison, float %r0, i32 0
%r01 = insertelement <4 x float> %r00, float %r1, i32 1
%r02 = insertelement <4 x float> %r01, float %r2, i32 2
%r03 = insertelement <4 x float> %r02, float %r3, i32 3
ret <4 x float> %r03
}
define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: @test_v2i64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%a0 = extractelement <2 x i64> %a, i32 0
%a1 = extractelement <2 x i64> %a, i32 1
%b0 = extractelement <2 x i64> %b, i32 0
%b1 = extractelement <2 x i64> %b, i32 1
%r0 = add i64 %a0, %a1
%r1 = add i64 %b0, %b1
%r00 = insertelement <2 x i64> poison, i64 %r0, i32 0
%r01 = insertelement <2 x i64> %r00, i64 %r1, i32 1
ret <2 x i64> %r01
}
define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @test_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
%a2 = extractelement <4 x i32> %a, i32 2
%a3 = extractelement <4 x i32> %a, i32 3
%b0 = extractelement <4 x i32> %b, i32 0
%b1 = extractelement <4 x i32> %b, i32 1
%b2 = extractelement <4 x i32> %b, i32 2
%b3 = extractelement <4 x i32> %b, i32 3
%r0 = add i32 %a0, %a1
%r1 = add i32 %a2, %a3
%r2 = add i32 %b0, %b1
%r3 = add i32 %b2, %b3
%r00 = insertelement <4 x i32> poison, i32 %r0, i32 0
%r01 = insertelement <4 x i32> %r00, i32 %r1, i32 1
%r02 = insertelement <4 x i32> %r01, i32 %r2, i32 2
%r03 = insertelement <4 x i32> %r02, i32 %r3, i32 3
ret <4 x i32> %r03
}
define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: @test_v8i16(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <8 x i16> [[TMP3]]
;
%a0 = extractelement <8 x i16> %a, i32 0
%a1 = extractelement <8 x i16> %a, i32 1
%a2 = extractelement <8 x i16> %a, i32 2
%a3 = extractelement <8 x i16> %a, i32 3
%a4 = extractelement <8 x i16> %a, i32 4
%a5 = extractelement <8 x i16> %a, i32 5
%a6 = extractelement <8 x i16> %a, i32 6
%a7 = extractelement <8 x i16> %a, i32 7
%b0 = extractelement <8 x i16> %b, i32 0
%b1 = extractelement <8 x i16> %b, i32 1
%b2 = extractelement <8 x i16> %b, i32 2
%b3 = extractelement <8 x i16> %b, i32 3
%b4 = extractelement <8 x i16> %b, i32 4
%b5 = extractelement <8 x i16> %b, i32 5
%b6 = extractelement <8 x i16> %b, i32 6
%b7 = extractelement <8 x i16> %b, i32 7
%r0 = add i16 %a0, %a1
%r1 = add i16 %a2, %a3
%r2 = add i16 %a4, %a5
%r3 = add i16 %a6, %a7
%r4 = add i16 %b0, %b1
%r5 = add i16 %b2, %b3
%r6 = add i16 %b4, %b5
%r7 = add i16 %b6, %b7
%r00 = insertelement <8 x i16> poison, i16 %r0, i32 0
%r01 = insertelement <8 x i16> %r00, i16 %r1, i32 1
%r02 = insertelement <8 x i16> %r01, i16 %r2, i32 2
%r03 = insertelement <8 x i16> %r02, i16 %r3, i32 3
%r04 = insertelement <8 x i16> %r03, i16 %r4, i32 4
%r05 = insertelement <8 x i16> %r04, i16 %r5, i32 5
%r06 = insertelement <8 x i16> %r05, i16 %r6, i32 6
%r07 = insertelement <8 x i16> %r06, i16 %r7, i32 7
ret <8 x i16> %r07
}
;
; 256-bit vectors
;
define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
; SSE-LABEL: @test_v4f64(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x double> [[R03]]
;
; SLM-LABEL: @test_v4f64(
; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3
; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1
; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2
; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3
; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]]
; SLM-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]]
; SLM-NEXT: [[R2:%.*]] = fadd double [[A2]], [[A3]]
; SLM-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]]
; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i32 0
; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i32 1
; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i32 2
; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i32 3
; SLM-NEXT: ret <4 x double> [[R03]]
;
; AVX-LABEL: @test_v4f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <4 x double> [[TMP3]]
;
%a0 = extractelement <4 x double> %a, i32 0
%a1 = extractelement <4 x double> %a, i32 1
%a2 = extractelement <4 x double> %a, i32 2
%a3 = extractelement <4 x double> %a, i32 3
%b0 = extractelement <4 x double> %b, i32 0
%b1 = extractelement <4 x double> %b, i32 1
%b2 = extractelement <4 x double> %b, i32 2
%b3 = extractelement <4 x double> %b, i32 3
%r0 = fadd double %a0, %a1
%r1 = fadd double %b0, %b1
%r2 = fadd double %a2, %a3
%r3 = fadd double %b2, %b3
%r00 = insertelement <4 x double> poison, double %r0, i32 0
%r01 = insertelement <4 x double> %r00, double %r1, i32 1
%r02 = insertelement <4 x double> %r01, double %r2, i32 2
%r03 = insertelement <4 x double> %r02, double %r3, i32 3
ret <4 x double> %r03
}
define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
; SSE-LABEL: @test_v8f32(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; SSE-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
; SSE-NEXT: ret <8 x float> [[TMP3]]
;
; SLM-LABEL: @test_v8f32(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>
; SLM-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
; SLM-NEXT: [[R07:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[R07]]
;
; AVX-LABEL: @test_v8f32(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; AVX-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <8 x float> [[TMP3]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
%a2 = extractelement <8 x float> %a, i32 2
%a3 = extractelement <8 x float> %a, i32 3
%a4 = extractelement <8 x float> %a, i32 4
%a5 = extractelement <8 x float> %a, i32 5
%a6 = extractelement <8 x float> %a, i32 6
%a7 = extractelement <8 x float> %a, i32 7
%b0 = extractelement <8 x float> %b, i32 0
%b1 = extractelement <8 x float> %b, i32 1
%b2 = extractelement <8 x float> %b, i32 2
%b3 = extractelement <8 x float> %b, i32 3
%b4 = extractelement <8 x float> %b, i32 4
%b5 = extractelement <8 x float> %b, i32 5
%b6 = extractelement <8 x float> %b, i32 6
%b7 = extractelement <8 x float> %b, i32 7
%r0 = fadd float %a0, %a1
%r1 = fadd float %a2, %a3
%r2 = fadd float %b0, %b1
%r3 = fadd float %b2, %b3
%r4 = fadd float %a4, %a5
%r5 = fadd float %a6, %a7
%r6 = fadd float %b4, %b5
%r7 = fadd float %b6, %b7
%r00 = insertelement <8 x float> poison, float %r0, i32 0
%r01 = insertelement <8 x float> %r00, float %r1, i32 1
%r02 = insertelement <8 x float> %r01, float %r2, i32 2
%r03 = insertelement <8 x float> %r02, float %r3, i32 3
%r04 = insertelement <8 x float> %r03, float %r4, i32 4
%r05 = insertelement <8 x float> %r04, float %r5, i32 5
%r06 = insertelement <8 x float> %r05, float %r6, i32 6
%r07 = insertelement <8 x float> %r06, float %r7, i32 7
ret <8 x float> %r07
}
define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: @test_v4i64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <4 x i64> [[TMP3]]
;
%a0 = extractelement <4 x i64> %a, i32 0
%a1 = extractelement <4 x i64> %a, i32 1
%a2 = extractelement <4 x i64> %a, i32 2
%a3 = extractelement <4 x i64> %a, i32 3
%b0 = extractelement <4 x i64> %b, i32 0
%b1 = extractelement <4 x i64> %b, i32 1
%b2 = extractelement <4 x i64> %b, i32 2
%b3 = extractelement <4 x i64> %b, i32 3
%r0 = add i64 %a0, %a1
%r1 = add i64 %b0, %b1
%r2 = add i64 %a2, %a3
%r3 = add i64 %b2, %b3
%r00 = insertelement <4 x i64> poison, i64 %r0, i32 0
%r01 = insertelement <4 x i64> %r00, i64 %r1, i32 1
%r02 = insertelement <4 x i64> %r01, i64 %r2, i32 2
%r03 = insertelement <4 x i64> %r02, i64 %r3, i32 3
ret <4 x i64> %r03
}
define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: @test_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
%a2 = extractelement <8 x i32> %a, i32 2
%a3 = extractelement <8 x i32> %a, i32 3
%a4 = extractelement <8 x i32> %a, i32 4
%a5 = extractelement <8 x i32> %a, i32 5
%a6 = extractelement <8 x i32> %a, i32 6
%a7 = extractelement <8 x i32> %a, i32 7
%b0 = extractelement <8 x i32> %b, i32 0
%b1 = extractelement <8 x i32> %b, i32 1
%b2 = extractelement <8 x i32> %b, i32 2
%b3 = extractelement <8 x i32> %b, i32 3
%b4 = extractelement <8 x i32> %b, i32 4
%b5 = extractelement <8 x i32> %b, i32 5
%b6 = extractelement <8 x i32> %b, i32 6
%b7 = extractelement <8 x i32> %b, i32 7
%r0 = add i32 %a0, %a1
%r1 = add i32 %a2, %a3
%r2 = add i32 %b0, %b1
%r3 = add i32 %b2, %b3
%r4 = add i32 %a4, %a5
%r5 = add i32 %a6, %a7
%r6 = add i32 %b4, %b5
%r7 = add i32 %b6, %b7
%r00 = insertelement <8 x i32> poison, i32 %r0, i32 0
%r01 = insertelement <8 x i32> %r00, i32 %r1, i32 1
%r02 = insertelement <8 x i32> %r01, i32 %r2, i32 2
%r03 = insertelement <8 x i32> %r02, i32 %r3, i32 3
%r04 = insertelement <8 x i32> %r03, i32 %r4, i32 4
%r05 = insertelement <8 x i32> %r04, i32 %r5, i32 5
%r06 = insertelement <8 x i32> %r05, i32 %r6, i32 6
%r07 = insertelement <8 x i32> %r06, i32 %r7, i32 7
ret <8 x i32> %r07
}
define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE-LABEL: @test_v16i16(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23>
; SSE-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
; SSE-NEXT: [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[RV15:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <16 x i16> [[RV15]]
;
; SLM-LABEL: @test_v16i16(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
; SLM-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]]
; SLM-NEXT: ret <16 x i16> [[TMP3]]
;
; AVX-LABEL: @test_v16i16(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
; AVX-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <16 x i16> [[TMP3]]
;
%a0 = extractelement <16 x i16> %a, i32 0
%a1 = extractelement <16 x i16> %a, i32 1
%a2 = extractelement <16 x i16> %a, i32 2
%a3 = extractelement <16 x i16> %a, i32 3
%a4 = extractelement <16 x i16> %a, i32 4
%a5 = extractelement <16 x i16> %a, i32 5
%a6 = extractelement <16 x i16> %a, i32 6
%a7 = extractelement <16 x i16> %a, i32 7
%a8 = extractelement <16 x i16> %a, i32 8
%a9 = extractelement <16 x i16> %a, i32 9
%a10 = extractelement <16 x i16> %a, i32 10
%a11 = extractelement <16 x i16> %a, i32 11
%a12 = extractelement <16 x i16> %a, i32 12
%a13 = extractelement <16 x i16> %a, i32 13
%a14 = extractelement <16 x i16> %a, i32 14
%a15 = extractelement <16 x i16> %a, i32 15
%b0 = extractelement <16 x i16> %b, i32 0
%b1 = extractelement <16 x i16> %b, i32 1
%b2 = extractelement <16 x i16> %b, i32 2
%b3 = extractelement <16 x i16> %b, i32 3
%b4 = extractelement <16 x i16> %b, i32 4
%b5 = extractelement <16 x i16> %b, i32 5
%b6 = extractelement <16 x i16> %b, i32 6
%b7 = extractelement <16 x i16> %b, i32 7
%b8 = extractelement <16 x i16> %b, i32 8
%b9 = extractelement <16 x i16> %b, i32 9
%b10 = extractelement <16 x i16> %b, i32 10
%b11 = extractelement <16 x i16> %b, i32 11
%b12 = extractelement <16 x i16> %b, i32 12
%b13 = extractelement <16 x i16> %b, i32 13
%b14 = extractelement <16 x i16> %b, i32 14
%b15 = extractelement <16 x i16> %b, i32 15
%r0 = add i16 %a0 , %a1
%r1 = add i16 %a2 , %a3
%r2 = add i16 %a4 , %a5
%r3 = add i16 %a6 , %a7
%r4 = add i16 %b0 , %b1
%r5 = add i16 %b2 , %b3
%r6 = add i16 %b4 , %b5
%r7 = add i16 %b6 , %b7
%r8 = add i16 %a8 , %a9
%r9 = add i16 %a10, %a11
%r10 = add i16 %a12, %a13
%r11 = add i16 %a14, %a15
%r12 = add i16 %b8 , %b9
%r13 = add i16 %b10, %b11
%r14 = add i16 %b12, %b13
%r15 = add i16 %b14, %b15
%rv0 = insertelement <16 x i16> poison, i16 %r0 , i32 0
%rv1 = insertelement <16 x i16> %rv0 , i16 %r1 , i32 1
%rv2 = insertelement <16 x i16> %rv1 , i16 %r2 , i32 2
%rv3 = insertelement <16 x i16> %rv2 , i16 %r3 , i32 3
%rv4 = insertelement <16 x i16> %rv3 , i16 %r4 , i32 4
%rv5 = insertelement <16 x i16> %rv4 , i16 %r5 , i32 5
%rv6 = insertelement <16 x i16> %rv5 , i16 %r6 , i32 6
%rv7 = insertelement <16 x i16> %rv6 , i16 %r7 , i32 7
%rv8 = insertelement <16 x i16> %rv7 , i16 %r8 , i32 8
%rv9 = insertelement <16 x i16> %rv8 , i16 %r9 , i32 9
%rv10 = insertelement <16 x i16> %rv9 , i16 %r10, i32 10
%rv11 = insertelement <16 x i16> %rv10, i16 %r11, i32 11
%rv12 = insertelement <16 x i16> %rv11, i16 %r12, i32 12
%rv13 = insertelement <16 x i16> %rv12, i16 %r13, i32 13
%rv14 = insertelement <16 x i16> %rv13, i16 %r14, i32 14
%rv15 = insertelement <16 x i16> %rv14, i16 %r15, i32 15
ret <16 x i16> %rv15
}

View File

@ -0,0 +1,433 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
;
; 128-bit vectors
;
define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: @test_v2f64(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
; SSE-NEXT: ret <2 x double> [[TMP3]]
;
; SLM-LABEL: @test_v2f64(
; SLM-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1
; SLM-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1
; SLM-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]]
; SLM-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]]
; SLM-NEXT: [[R00:%.*]] = insertelement <2 x double> poison, double [[R0]], i32 0
; SLM-NEXT: [[R01:%.*]] = insertelement <2 x double> [[R00]], double [[R1]], i32 1
; SLM-NEXT: ret <2 x double> [[R01]]
;
; AVX-LABEL: @test_v2f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; AVX-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <2 x double> [[TMP3]]
;
%a0 = extractelement <2 x double> %a, i32 0
%a1 = extractelement <2 x double> %a, i32 1
%b0 = extractelement <2 x double> %b, i32 0
%b1 = extractelement <2 x double> %b, i32 1
%r0 = fsub double %a0, %a1
%r1 = fsub double %b0, %b1
%r00 = insertelement <2 x double> poison, double %r0, i32 0
%r01 = insertelement <2 x double> %r00, double %r1, i32 1
ret <2 x double> %r01
}
define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <4 x float> [[TMP3]]
;
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
%b0 = extractelement <4 x float> %b, i32 0
%b1 = extractelement <4 x float> %b, i32 1
%b2 = extractelement <4 x float> %b, i32 2
%b3 = extractelement <4 x float> %b, i32 3
%r0 = fsub float %a0, %a1
%r1 = fsub float %a2, %a3
%r2 = fsub float %b0, %b1
%r3 = fsub float %b2, %b3
%r00 = insertelement <4 x float> poison, float %r0, i32 0
%r01 = insertelement <4 x float> %r00, float %r1, i32 1
%r02 = insertelement <4 x float> %r01, float %r2, i32 2
%r03 = insertelement <4 x float> %r02, float %r3, i32 3
ret <4 x float> %r03
}
define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: @test_v2i64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%a0 = extractelement <2 x i64> %a, i32 0
%a1 = extractelement <2 x i64> %a, i32 1
%b0 = extractelement <2 x i64> %b, i32 0
%b1 = extractelement <2 x i64> %b, i32 1
%r0 = sub i64 %a0, %a1
%r1 = sub i64 %b0, %b1
%r00 = insertelement <2 x i64> poison, i64 %r0, i32 0
%r01 = insertelement <2 x i64> %r00, i64 %r1, i32 1
ret <2 x i64> %r01
}
define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @test_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
;
%a0 = extractelement <4 x i32> %a, i32 0
%a1 = extractelement <4 x i32> %a, i32 1
%a2 = extractelement <4 x i32> %a, i32 2
%a3 = extractelement <4 x i32> %a, i32 3
%b0 = extractelement <4 x i32> %b, i32 0
%b1 = extractelement <4 x i32> %b, i32 1
%b2 = extractelement <4 x i32> %b, i32 2
%b3 = extractelement <4 x i32> %b, i32 3
%r0 = sub i32 %a0, %a1
%r1 = sub i32 %a2, %a3
%r2 = sub i32 %b0, %b1
%r3 = sub i32 %b2, %b3
%r00 = insertelement <4 x i32> poison, i32 %r0, i32 0
%r01 = insertelement <4 x i32> %r00, i32 %r1, i32 1
%r02 = insertelement <4 x i32> %r01, i32 %r2, i32 2
%r03 = insertelement <4 x i32> %r02, i32 %r3, i32 3
ret <4 x i32> %r03
}
define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: @test_v8i16(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <8 x i16> [[TMP3]]
;
%a0 = extractelement <8 x i16> %a, i32 0
%a1 = extractelement <8 x i16> %a, i32 1
%a2 = extractelement <8 x i16> %a, i32 2
%a3 = extractelement <8 x i16> %a, i32 3
%a4 = extractelement <8 x i16> %a, i32 4
%a5 = extractelement <8 x i16> %a, i32 5
%a6 = extractelement <8 x i16> %a, i32 6
%a7 = extractelement <8 x i16> %a, i32 7
%b0 = extractelement <8 x i16> %b, i32 0
%b1 = extractelement <8 x i16> %b, i32 1
%b2 = extractelement <8 x i16> %b, i32 2
%b3 = extractelement <8 x i16> %b, i32 3
%b4 = extractelement <8 x i16> %b, i32 4
%b5 = extractelement <8 x i16> %b, i32 5
%b6 = extractelement <8 x i16> %b, i32 6
%b7 = extractelement <8 x i16> %b, i32 7
%r0 = sub i16 %a0, %a1
%r1 = sub i16 %a2, %a3
%r2 = sub i16 %a4, %a5
%r3 = sub i16 %a6, %a7
%r4 = sub i16 %b0, %b1
%r5 = sub i16 %b2, %b3
%r6 = sub i16 %b4, %b5
%r7 = sub i16 %b6, %b7
%r00 = insertelement <8 x i16> poison, i16 %r0, i32 0
%r01 = insertelement <8 x i16> %r00, i16 %r1, i32 1
%r02 = insertelement <8 x i16> %r01, i16 %r2, i32 2
%r03 = insertelement <8 x i16> %r02, i16 %r3, i32 3
%r04 = insertelement <8 x i16> %r03, i16 %r4, i32 4
%r05 = insertelement <8 x i16> %r04, i16 %r5, i32 5
%r06 = insertelement <8 x i16> %r05, i16 %r6, i32 6
%r07 = insertelement <8 x i16> %r06, i16 %r7, i32 7
ret <8 x i16> %r07
}
;
; 256-bit vectors
;
define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
; SSE-LABEL: @test_v4f64(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
; SSE-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x double> [[R03]]
;
; SLM-LABEL: @test_v4f64(
; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3
; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1
; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2
; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3
; SLM-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]]
; SLM-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]]
; SLM-NEXT: [[R2:%.*]] = fsub double [[A2]], [[A3]]
; SLM-NEXT: [[R3:%.*]] = fsub double [[B2]], [[B3]]
; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i32 0
; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i32 1
; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i32 2
; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i32 3
; SLM-NEXT: ret <4 x double> [[R03]]
;
; AVX-LABEL: @test_v4f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
; AVX-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <4 x double> [[TMP3]]
;
%a0 = extractelement <4 x double> %a, i32 0
%a1 = extractelement <4 x double> %a, i32 1
%a2 = extractelement <4 x double> %a, i32 2
%a3 = extractelement <4 x double> %a, i32 3
%b0 = extractelement <4 x double> %b, i32 0
%b1 = extractelement <4 x double> %b, i32 1
%b2 = extractelement <4 x double> %b, i32 2
%b3 = extractelement <4 x double> %b, i32 3
%r0 = fsub double %a0, %a1
%r1 = fsub double %b0, %b1
%r2 = fsub double %a2, %a3
%r3 = fsub double %b2, %b3
%r00 = insertelement <4 x double> poison, double %r0, i32 0
%r01 = insertelement <4 x double> %r00, double %r1, i32 1
%r02 = insertelement <4 x double> %r01, double %r2, i32 2
%r03 = insertelement <4 x double> %r02, double %r3, i32 3
ret <4 x double> %r03
}
define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
; SSE-LABEL: @test_v8f32(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; SSE-NEXT: [[TMP3:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]]
; SSE-NEXT: ret <8 x float> [[TMP3]]
;
; SLM-LABEL: @test_v8f32(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>
; SLM-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
; SLM-NEXT: [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]]
; SLM-NEXT: [[R07:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[R07]]
;
; AVX-LABEL: @test_v8f32(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; AVX-NEXT: [[TMP3:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <8 x float> [[TMP3]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
%a2 = extractelement <8 x float> %a, i32 2
%a3 = extractelement <8 x float> %a, i32 3
%a4 = extractelement <8 x float> %a, i32 4
%a5 = extractelement <8 x float> %a, i32 5
%a6 = extractelement <8 x float> %a, i32 6
%a7 = extractelement <8 x float> %a, i32 7
%b0 = extractelement <8 x float> %b, i32 0
%b1 = extractelement <8 x float> %b, i32 1
%b2 = extractelement <8 x float> %b, i32 2
%b3 = extractelement <8 x float> %b, i32 3
%b4 = extractelement <8 x float> %b, i32 4
%b5 = extractelement <8 x float> %b, i32 5
%b6 = extractelement <8 x float> %b, i32 6
%b7 = extractelement <8 x float> %b, i32 7
%r0 = fsub float %a0, %a1
%r1 = fsub float %a2, %a3
%r2 = fsub float %b0, %b1
%r3 = fsub float %b2, %b3
%r4 = fsub float %a4, %a5
%r5 = fsub float %a6, %a7
%r6 = fsub float %b4, %b5
%r7 = fsub float %b6, %b7
%r00 = insertelement <8 x float> poison, float %r0, i32 0
%r01 = insertelement <8 x float> %r00, float %r1, i32 1
%r02 = insertelement <8 x float> %r01, float %r2, i32 2
%r03 = insertelement <8 x float> %r02, float %r3, i32 3
%r04 = insertelement <8 x float> %r03, float %r4, i32 4
%r05 = insertelement <8 x float> %r04, float %r5, i32 5
%r06 = insertelement <8 x float> %r05, float %r6, i32 6
%r07 = insertelement <8 x float> %r06, float %r7, i32 7
ret <8 x float> %r07
}
define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: @test_v4i64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <4 x i64> [[TMP3]]
;
%a0 = extractelement <4 x i64> %a, i32 0
%a1 = extractelement <4 x i64> %a, i32 1
%a2 = extractelement <4 x i64> %a, i32 2
%a3 = extractelement <4 x i64> %a, i32 3
%b0 = extractelement <4 x i64> %b, i32 0
%b1 = extractelement <4 x i64> %b, i32 1
%b2 = extractelement <4 x i64> %b, i32 2
%b3 = extractelement <4 x i64> %b, i32 3
%r0 = sub i64 %a0, %a1
%r1 = sub i64 %b0, %b1
%r2 = sub i64 %a2, %a3
%r3 = sub i64 %b2, %b3
%r00 = insertelement <4 x i64> poison, i64 %r0, i32 0
%r01 = insertelement <4 x i64> %r00, i64 %r1, i32 1
%r02 = insertelement <4 x i64> %r01, i64 %r2, i32 2
%r03 = insertelement <4 x i64> %r02, i64 %r3, i32 3
ret <4 x i64> %r03
}
define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: @test_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
%a2 = extractelement <8 x i32> %a, i32 2
%a3 = extractelement <8 x i32> %a, i32 3
%a4 = extractelement <8 x i32> %a, i32 4
%a5 = extractelement <8 x i32> %a, i32 5
%a6 = extractelement <8 x i32> %a, i32 6
%a7 = extractelement <8 x i32> %a, i32 7
%b0 = extractelement <8 x i32> %b, i32 0
%b1 = extractelement <8 x i32> %b, i32 1
%b2 = extractelement <8 x i32> %b, i32 2
%b3 = extractelement <8 x i32> %b, i32 3
%b4 = extractelement <8 x i32> %b, i32 4
%b5 = extractelement <8 x i32> %b, i32 5
%b6 = extractelement <8 x i32> %b, i32 6
%b7 = extractelement <8 x i32> %b, i32 7
%r0 = sub i32 %a0, %a1
%r1 = sub i32 %a2, %a3
%r2 = sub i32 %b0, %b1
%r3 = sub i32 %b2, %b3
%r4 = sub i32 %a4, %a5
%r5 = sub i32 %a6, %a7
%r6 = sub i32 %b4, %b5
%r7 = sub i32 %b6, %b7
%r00 = insertelement <8 x i32> poison, i32 %r0, i32 0
%r01 = insertelement <8 x i32> %r00, i32 %r1, i32 1
%r02 = insertelement <8 x i32> %r01, i32 %r2, i32 2
%r03 = insertelement <8 x i32> %r02, i32 %r3, i32 3
%r04 = insertelement <8 x i32> %r03, i32 %r4, i32 4
%r05 = insertelement <8 x i32> %r04, i32 %r5, i32 5
%r06 = insertelement <8 x i32> %r05, i32 %r6, i32 6
%r07 = insertelement <8 x i32> %r06, i32 %r7, i32 7
ret <8 x i32> %r07
}
define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE-LABEL: @test_v16i16(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23>
; SSE-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
; SSE-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[RV15:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <16 x i16> [[RV15]]
;
; SLM-LABEL: @test_v16i16(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
; SLM-NEXT: [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]]
; SLM-NEXT: ret <16 x i16> [[TMP3]]
;
; AVX-LABEL: @test_v16i16(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
; AVX-NEXT: [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <16 x i16> [[TMP3]]
;
%a0 = extractelement <16 x i16> %a, i32 0
%a1 = extractelement <16 x i16> %a, i32 1
%a2 = extractelement <16 x i16> %a, i32 2
%a3 = extractelement <16 x i16> %a, i32 3
%a4 = extractelement <16 x i16> %a, i32 4
%a5 = extractelement <16 x i16> %a, i32 5
%a6 = extractelement <16 x i16> %a, i32 6
%a7 = extractelement <16 x i16> %a, i32 7
%a8 = extractelement <16 x i16> %a, i32 8
%a9 = extractelement <16 x i16> %a, i32 9
%a10 = extractelement <16 x i16> %a, i32 10
%a11 = extractelement <16 x i16> %a, i32 11
%a12 = extractelement <16 x i16> %a, i32 12
%a13 = extractelement <16 x i16> %a, i32 13
%a14 = extractelement <16 x i16> %a, i32 14
%a15 = extractelement <16 x i16> %a, i32 15
%b0 = extractelement <16 x i16> %b, i32 0
%b1 = extractelement <16 x i16> %b, i32 1
%b2 = extractelement <16 x i16> %b, i32 2
%b3 = extractelement <16 x i16> %b, i32 3
%b4 = extractelement <16 x i16> %b, i32 4
%b5 = extractelement <16 x i16> %b, i32 5
%b6 = extractelement <16 x i16> %b, i32 6
%b7 = extractelement <16 x i16> %b, i32 7
%b8 = extractelement <16 x i16> %b, i32 8
%b9 = extractelement <16 x i16> %b, i32 9
%b10 = extractelement <16 x i16> %b, i32 10
%b11 = extractelement <16 x i16> %b, i32 11
%b12 = extractelement <16 x i16> %b, i32 12
%b13 = extractelement <16 x i16> %b, i32 13
%b14 = extractelement <16 x i16> %b, i32 14
%b15 = extractelement <16 x i16> %b, i32 15
%r0 = sub i16 %a0 , %a1
%r1 = sub i16 %a2 , %a3
%r2 = sub i16 %a4 , %a5
%r3 = sub i16 %a6 , %a7
%r4 = sub i16 %b0 , %b1
%r5 = sub i16 %b2 , %b3
%r6 = sub i16 %b4 , %b5
%r7 = sub i16 %b6 , %b7
%r8 = sub i16 %a8 , %a9
%r9 = sub i16 %a10, %a11
%r10 = sub i16 %a12, %a13
%r11 = sub i16 %a14, %a15
%r12 = sub i16 %b8 , %b9
%r13 = sub i16 %b10, %b11
%r14 = sub i16 %b12, %b13
%r15 = sub i16 %b14, %b15
%rv0 = insertelement <16 x i16> poison, i16 %r0 , i32 0
%rv1 = insertelement <16 x i16> %rv0 , i16 %r1 , i32 1
%rv2 = insertelement <16 x i16> %rv1 , i16 %r2 , i32 2
%rv3 = insertelement <16 x i16> %rv2 , i16 %r3 , i32 3
%rv4 = insertelement <16 x i16> %rv3 , i16 %r4 , i32 4
%rv5 = insertelement <16 x i16> %rv4 , i16 %r5 , i32 5
%rv6 = insertelement <16 x i16> %rv5 , i16 %r6 , i32 6
%rv7 = insertelement <16 x i16> %rv6 , i16 %r7 , i32 7
%rv8 = insertelement <16 x i16> %rv7 , i16 %r8 , i32 8
%rv9 = insertelement <16 x i16> %rv8 , i16 %r9 , i32 9
%rv10 = insertelement <16 x i16> %rv9 , i16 %r10, i32 10
%rv11 = insertelement <16 x i16> %rv10, i16 %r11, i32 11
%rv12 = insertelement <16 x i16> %rv11, i16 %r12, i32 12
%rv13 = insertelement <16 x i16> %rv12, i16 %r13, i32 13
%rv14 = insertelement <16 x i16> %rv13, i16 %r14, i32 14
%rv15 = insertelement <16 x i16> %rv14, i16 %r15, i32 15
ret <16 x i16> %rv15
}

View File

@ -0,0 +1,540 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -slp-vectorizer -slp-threshold=-10000 < %s | FileCheck %s
; RUN: opt -S -slp-vectorizer -slp-threshold=0 < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
; CHECK-NEXT: ret <4 x float> [[RD]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
%c2 = extractelement <4 x i32> %c, i32 2
%c3 = extractelement <4 x i32> %c, i32 3
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
%b0 = extractelement <4 x float> %b, i32 0
%b1 = extractelement <4 x float> %b, i32 1
%b2 = extractelement <4 x float> %b, i32 2
%b3 = extractelement <4 x float> %b, i32 3
%cmp0 = icmp ne i32 %c0, 0
%cmp1 = icmp ne i32 %c1, 0
%cmp2 = icmp ne i32 %c2, 0
%cmp3 = icmp ne i32 %c3, 0
%s0 = select i1 %cmp0, float %a0, float %b0
%s1 = select i1 %cmp1, float %a1, float %b1
%s2 = select i1 %cmp2, float %a2, float %b2
%s3 = select i1 %cmp3, float %a3, float %b3
%ra = insertelement <4 x float> poison, float %s0, i32 0
%rb = insertelement <4 x float> %ra, float %s1, i32 1
%rc = insertelement <4 x float> %rb, float %s2, i32 2
%rd = insertelement <4 x float> %rc, float %s3, i32 3
ret <4 x float> %rd
}
declare void @llvm.assume(i1) nounwind
; This entire tree is ephemeral, don't vectorize any of it.
define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_eph(
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[C3]], 0
; CHECK-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
; CHECK-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]]
; CHECK-NEXT: [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]]
; CHECK-NEXT: [[S3:%.*]] = select i1 [[CMP3]], float [[A3]], float [[B3]]
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> poison, float [[S0]], i32 0
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[S2]], i32 2
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3
; CHECK-NEXT: [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0
; CHECK-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1
; CHECK-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2
; CHECK-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3
; CHECK-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]]
; CHECK-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]]
; CHECK-NEXT: [[Q6:%.*]] = fadd float [[Q4]], [[Q5]]
; CHECK-NEXT: [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]]
; CHECK-NEXT: call void @llvm.assume(i1 [[QI]])
; CHECK-NEXT: ret <4 x float> undef
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
%c2 = extractelement <4 x i32> %c, i32 2
%c3 = extractelement <4 x i32> %c, i32 3
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
%b0 = extractelement <4 x float> %b, i32 0
%b1 = extractelement <4 x float> %b, i32 1
%b2 = extractelement <4 x float> %b, i32 2
%b3 = extractelement <4 x float> %b, i32 3
%cmp0 = icmp ne i32 %c0, 0
%cmp1 = icmp ne i32 %c1, 0
%cmp2 = icmp ne i32 %c2, 0
%cmp3 = icmp ne i32 %c3, 0
%s0 = select i1 %cmp0, float %a0, float %b0
%s1 = select i1 %cmp1, float %a1, float %b1
%s2 = select i1 %cmp2, float %a2, float %b2
%s3 = select i1 %cmp3, float %a3, float %b3
%ra = insertelement <4 x float> poison, float %s0, i32 0
%rb = insertelement <4 x float> %ra, float %s1, i32 1
%rc = insertelement <4 x float> %rb, float %s2, i32 2
%rd = insertelement <4 x float> %rc, float %s3, i32 3
%q0 = extractelement <4 x float> %rd, i32 0
%q1 = extractelement <4 x float> %rd, i32 1
%q2 = extractelement <4 x float> %rd, i32 2
%q3 = extractelement <4 x float> %rd, i32 3
%q4 = fadd float %q0, %q1
%q5 = fadd float %q2, %q3
%q6 = fadd float %q4, %q5
%qi = fcmp olt float %q6, %q5
call void @llvm.assume(i1 %qi)
ret <4 x float> undef
}
; Insert in an order different from the vector indices to make sure it
; doesn't matter
define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_insert_out_of_order(
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[SHUFFLE]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[SHUFFLE1]], <4 x float> [[SHUFFLE2]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
; CHECK-NEXT: ret <4 x float> [[RD]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
%c2 = extractelement <4 x i32> %c, i32 2
%c3 = extractelement <4 x i32> %c, i32 3
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
%b0 = extractelement <4 x float> %b, i32 0
%b1 = extractelement <4 x float> %b, i32 1
%b2 = extractelement <4 x float> %b, i32 2
%b3 = extractelement <4 x float> %b, i32 3
%cmp0 = icmp ne i32 %c0, 0
%cmp1 = icmp ne i32 %c1, 0
%cmp2 = icmp ne i32 %c2, 0
%cmp3 = icmp ne i32 %c3, 0
%s0 = select i1 %cmp0, float %a0, float %b0
%s1 = select i1 %cmp1, float %a1, float %b1
%s2 = select i1 %cmp2, float %a2, float %b2
%s3 = select i1 %cmp3, float %a3, float %b3
%ra = insertelement <4 x float> poison, float %s0, i32 2
%rb = insertelement <4 x float> %ra, float %s1, i32 1
%rc = insertelement <4 x float> %rb, float %s2, i32 0
%rd = insertelement <4 x float> %rc, float %s3, i32 3
ret <4 x float> %rd
}
declare void @v4f32_user(<4 x float>) #0
declare void @f32_user(float) #0
; Multiple users of the final constructed vector
define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_users(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
; CHECK-NEXT: call void @v4f32_user(<4 x float> [[RD]]) [[ATTR0:#.*]]
; CHECK-NEXT: ret <4 x float> [[RD]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
%c2 = extractelement <4 x i32> %c, i32 2
%c3 = extractelement <4 x i32> %c, i32 3
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
%b0 = extractelement <4 x float> %b, i32 0
%b1 = extractelement <4 x float> %b, i32 1
%b2 = extractelement <4 x float> %b, i32 2
%b3 = extractelement <4 x float> %b, i32 3
%cmp0 = icmp ne i32 %c0, 0
%cmp1 = icmp ne i32 %c1, 0
%cmp2 = icmp ne i32 %c2, 0
%cmp3 = icmp ne i32 %c3, 0
%s0 = select i1 %cmp0, float %a0, float %b0
%s1 = select i1 %cmp1, float %a1, float %b1
%s2 = select i1 %cmp2, float %a2, float %b2
%s3 = select i1 %cmp3, float %a3, float %b3
%ra = insertelement <4 x float> poison, float %s0, i32 0
%rb = insertelement <4 x float> %ra, float %s1, i32 1
%rc = insertelement <4 x float> %rb, float %s2, i32 2
%rd = insertelement <4 x float> %rc, float %s3, i32 3
call void @v4f32_user(<4 x float> %rd) #0
ret <4 x float> %rd
}
; Unused insertelement
define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_no_users(
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[C3]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A1]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[B1]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> poison, float [[TMP17]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP11]], i32 1
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP18]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP16]], i32 0
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> poison, float [[TMP19]], i32 2
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP16]], i32 1
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP20]], i32 3
; CHECK-NEXT: ret <4 x float> [[RD]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
%c2 = extractelement <4 x i32> %c, i32 2
%c3 = extractelement <4 x i32> %c, i32 3
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
%b0 = extractelement <4 x float> %b, i32 0
%b1 = extractelement <4 x float> %b, i32 1
%b2 = extractelement <4 x float> %b, i32 2
%b3 = extractelement <4 x float> %b, i32 3
%cmp0 = icmp ne i32 %c0, 0
%cmp1 = icmp ne i32 %c1, 0
%cmp2 = icmp ne i32 %c2, 0
%cmp3 = icmp ne i32 %c3, 0
%s0 = select i1 %cmp0, float %a0, float %b0
%s1 = select i1 %cmp1, float %a1, float %b1
%s2 = select i1 %cmp2, float %a2, float %b2
%s3 = select i1 %cmp3, float %a3, float %b3
%ra = insertelement <4 x float> poison, float %s0, i32 0
%rb = insertelement <4 x float> %ra, float %s1, i32 1
%rc = insertelement <4 x float> poison, float %s2, i32 2
%rd = insertelement <4 x float> %rc, float %s3, i32 3
ret <4 x float> %rd
}
; Make sure infinite loop doesn't happen which I ran into when trying
; to do this backwards this backwards
define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
; CHECK-LABEL: @reconstruct(
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> poison, i32 [[C0]], i32 0
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[C1]], i32 1
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[C2]], i32 2
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[C3]], i32 3
; CHECK-NEXT: ret <4 x i32> [[RD]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
%c2 = extractelement <4 x i32> %c, i32 2
%c3 = extractelement <4 x i32> %c, i32 3
%ra = insertelement <4 x i32> poison, i32 %c0, i32 0
%rb = insertelement <4 x i32> %ra, i32 %c1, i32 1
%rc = insertelement <4 x i32> %rb, i32 %c2, i32 2
%rd = insertelement <4 x i32> %rc, i32 %c3, i32 3
ret <4 x i32> %rd
}
define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_v2(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[RB:%.*]] = insertelement <2 x float> [[RA]], float [[TMP4]], i32 1
; CHECK-NEXT: ret <2 x float> [[RB]]
;
%c0 = extractelement <2 x i32> %c, i32 0
%c1 = extractelement <2 x i32> %c, i32 1
%a0 = extractelement <2 x float> %a, i32 0
%a1 = extractelement <2 x float> %a, i32 1
%b0 = extractelement <2 x float> %b, i32 0
%b1 = extractelement <2 x float> %b, i32 1
%cmp0 = icmp ne i32 %c0, 0
%cmp1 = icmp ne i32 %c1, 0
%s0 = select i1 %cmp0, float %a0, float %b0
%s1 = select i1 %cmp1, float %a1, float %b1
%ra = insertelement <2 x float> poison, float %s0, i32 0
%rb = insertelement <2 x float> %ra, float %s1, i32 1
ret <2 x float> %rb
}
; Make sure when we construct partial vectors, we don't keep
; re-visiting the insertelement chains starting with undef
; (low cost threshold needed to force this to happen)
define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_partial_vector(
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> poison, float [[TMP9]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP10]], i32 1
; CHECK-NEXT: ret <4 x float> [[RB]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%b0 = extractelement <4 x float> %b, i32 0
%b1 = extractelement <4 x float> %b, i32 1
%1 = insertelement <2 x i32> poison, i32 %c0, i32 0
%2 = insertelement <2 x i32> %1, i32 %c1, i32 1
%3 = icmp ne <2 x i32> %2, zeroinitializer
%4 = insertelement <2 x float> poison, float %a0, i32 0
%5 = insertelement <2 x float> %4, float %a1, i32 1
%6 = insertelement <2 x float> poison, float %b0, i32 0
%7 = insertelement <2 x float> %6, float %b1, i32 1
%8 = select <2 x i1> %3, <2 x float> %5, <2 x float> %7
%9 = extractelement <2 x float> %8, i32 0
%ra = insertelement <4 x float> poison, float %9, i32 0
%10 = extractelement <2 x float> %8, i32 1
%rb = insertelement <4 x float> %ra, float %10, i32 1
ret <4 x float> %rb
}
; Make sure that vectorization happens even if insertelements operations
; must be rescheduled. The case here is from compiling Julia.
define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @reschedule_extract(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> [[V0]], float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[TMP4]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[TMP5]], i32 3
; CHECK-NEXT: ret <4 x float> [[V3]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0
%c0 = fadd float %a0, %b0
%v0 = insertelement <4 x float> poison, float %c0, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%b1 = extractelement <4 x float> %b, i32 1
%c1 = fadd float %a1, %b1
%v1 = insertelement <4 x float> %v0, float %c1, i32 1
%a2 = extractelement <4 x float> %a, i32 2
%b2 = extractelement <4 x float> %b, i32 2
%c2 = fadd float %a2, %b2
%v2 = insertelement <4 x float> %v1, float %c2, i32 2
%a3 = extractelement <4 x float> %a, i32 3
%b3 = extractelement <4 x float> %b, i32 3
%c3 = fadd float %a3, %b3
%v3 = insertelement <4 x float> %v2, float %c3, i32 3
ret <4 x float> %v3
}
; Check that cost model for vectorization takes credit for
; instructions that are erased.
define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @take_credit(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> [[V0]], float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[TMP4]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[TMP5]], i32 3
; CHECK-NEXT: ret <4 x float> [[V3]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0
%c0 = fadd float %a0, %b0
%a1 = extractelement <4 x float> %a, i32 1
%b1 = extractelement <4 x float> %b, i32 1
%c1 = fadd float %a1, %b1
%a2 = extractelement <4 x float> %a, i32 2
%b2 = extractelement <4 x float> %b, i32 2
%c2 = fadd float %a2, %b2
%a3 = extractelement <4 x float> %a, i32 3
%b3 = extractelement <4 x float> %b, i32 3
%c3 = fadd float %a3, %b3
%v0 = insertelement <4 x float> poison, float %c0, i32 0
%v1 = insertelement <4 x float> %v0, float %c1, i32 1
%v2 = insertelement <4 x float> %v1, float %c2, i32 2
%v3 = insertelement <4 x float> %v2, float %c3, i32 3
ret <4 x float> %v3
}
; Make sure we handle multiple trees that feed one build vector correctly.
define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
; CHECK-LABEL: @multi_tree(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> undef, double [[Z:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[X:%.*]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], <double 3.000000e+00, double 2.000000e+00, double 1.000000e+00, double 0.000000e+00>
; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP6]], i32 3
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> poison, double [[TMP7]], i32 3
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP6]], i32 2
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP8]], i32 2
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP6]], i32 1
; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP9]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x double> [[TMP6]], i32 0
; CHECK-NEXT: [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP10]], i32 0
; CHECK-NEXT: ret <4 x double> [[I4]]
;
%t0 = fadd double %w , 0.000000e+00
%t1 = fadd double %x , 1.000000e+00
%t2 = fadd double %y , 2.000000e+00
%t3 = fadd double %z , 3.000000e+00
%t4 = fmul double %t0, 1.000000e+00
%i1 = insertelement <4 x double> poison, double %t4, i32 3
%t5 = fmul double %t1, 1.000000e+00
%i2 = insertelement <4 x double> %i1, double %t5, i32 2
%t6 = fmul double %t2, 1.000000e+00
%i3 = insertelement <4 x double> %i2, double %t6, i32 1
%t7 = fmul double %t3, 1.000000e+00
%i4 = insertelement <4 x double> %i3, double %t7, i32 0
ret <4 x double> %i4
}
define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 {
; CHECK-LABEL: @_vadd256(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> poison, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 2
; CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[TMP4]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP1]], i32 3
; CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP5]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP1]], i32 4
; CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP6]], i32 4
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP1]], i32 5
; CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP7]], i32 5
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[TMP1]], i32 6
; CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP8]], i32 6
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7
; CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP9]], i32 7
; CHECK-NEXT: ret <8 x float> [[VECINIT7_I]]
;
%vecext = extractelement <8 x float> %a, i32 0
%vecext1 = extractelement <8 x float> %b, i32 0
%add = fadd float %vecext, %vecext1
%vecext2 = extractelement <8 x float> %a, i32 1
%vecext3 = extractelement <8 x float> %b, i32 1
%add4 = fadd float %vecext2, %vecext3
%vecext5 = extractelement <8 x float> %a, i32 2
%vecext6 = extractelement <8 x float> %b, i32 2
%add7 = fadd float %vecext5, %vecext6
%vecext8 = extractelement <8 x float> %a, i32 3
%vecext9 = extractelement <8 x float> %b, i32 3
%add10 = fadd float %vecext8, %vecext9
%vecext11 = extractelement <8 x float> %a, i32 4
%vecext12 = extractelement <8 x float> %b, i32 4
%add13 = fadd float %vecext11, %vecext12
%vecext14 = extractelement <8 x float> %a, i32 5
%vecext15 = extractelement <8 x float> %b, i32 5
%add16 = fadd float %vecext14, %vecext15
%vecext17 = extractelement <8 x float> %a, i32 6
%vecext18 = extractelement <8 x float> %b, i32 6
%add19 = fadd float %vecext17, %vecext18
%vecext20 = extractelement <8 x float> %a, i32 7
%vecext21 = extractelement <8 x float> %b, i32 7
%add22 = fadd float %vecext20, %vecext21
%vecinit.i = insertelement <8 x float> poison, float %add, i32 0
%vecinit1.i = insertelement <8 x float> %vecinit.i, float %add4, i32 1
%vecinit2.i = insertelement <8 x float> %vecinit1.i, float %add7, i32 2
%vecinit3.i = insertelement <8 x float> %vecinit2.i, float %add10, i32 3
%vecinit4.i = insertelement <8 x float> %vecinit3.i, float %add13, i32 4
%vecinit5.i = insertelement <8 x float> %vecinit4.i, float %add16, i32 5
%vecinit6.i = insertelement <8 x float> %vecinit5.i, float %add19, i32 6
%vecinit7.i = insertelement <8 x float> %vecinit6.i, float %add22, i32 7
ret <8 x float> %vecinit7.i
}
attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }

View File

@ -0,0 +1,208 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=haswell | FileCheck %s
;unsigned load_le32(unsigned char *data) {
; unsigned le32 = (data[0]<<0) | (data[1]<<8) | (data[2]<<16) | (data[3]<<24);
; return le32;
;}
define i32 @_Z9load_le32Ph(i8* nocapture readonly %data) {
; CHECK-LABEL: @_Z9load_le32Ph(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[DATA:%.*]], align 1
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DATA]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[SHL3:%.*]] = shl nuw nsw i32 [[CONV2]], 8
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL3]], [[CONV]]
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[DATA]], i64 2
; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX4]], align 1
; CHECK-NEXT: [[CONV5:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[SHL6:%.*]] = shl nuw nsw i32 [[CONV5]], 16
; CHECK-NEXT: [[OR7:%.*]] = or i32 [[OR]], [[SHL6]]
; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[DATA]], i64 3
; CHECK-NEXT: [[TMP3:%.*]] = load i8, i8* [[ARRAYIDX8]], align 1
; CHECK-NEXT: [[CONV9:%.*]] = zext i8 [[TMP3]] to i32
; CHECK-NEXT: [[SHL10:%.*]] = shl nuw i32 [[CONV9]], 24
; CHECK-NEXT: [[OR11:%.*]] = or i32 [[OR7]], [[SHL10]]
; CHECK-NEXT: ret i32 [[OR11]]
;
entry:
%0 = load i8, i8* %data, align 1
%conv = zext i8 %0 to i32
%arrayidx1 = getelementptr inbounds i8, i8* %data, i64 1
%1 = load i8, i8* %arrayidx1, align 1
%conv2 = zext i8 %1 to i32
%shl3 = shl nuw nsw i32 %conv2, 8
%or = or i32 %shl3, %conv
%arrayidx4 = getelementptr inbounds i8, i8* %data, i64 2
%2 = load i8, i8* %arrayidx4, align 1
%conv5 = zext i8 %2 to i32
%shl6 = shl nuw nsw i32 %conv5, 16
%or7 = or i32 %or, %shl6
%arrayidx8 = getelementptr inbounds i8, i8* %data, i64 3
%3 = load i8, i8* %arrayidx8, align 1
%conv9 = zext i8 %3 to i32
%shl10 = shl nuw i32 %conv9, 24
%or11 = or i32 %or7, %shl10
ret i32 %or11
}
define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceable(16) %x) {
; CHECK-LABEL: @PR16739_byref(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 1
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <2 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[X2:%.*]] = load float, float* [[GEP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> [[I0]], float [[TMP4]], i32 1
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x float> [[I1]], float [[X2]], i32 2
; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I2]], float [[X2]], i32 3
; CHECK-NEXT: ret <4 x float> [[I3]]
;
%gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0
%gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1
%gep2 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 2
%x0 = load float, float* %gep0
%x1 = load float, float* %gep1
%x2 = load float, float* %gep2
%i0 = insertelement <4 x float> poison, float %x0, i32 0
%i1 = insertelement <4 x float> %i0, float %x1, i32 1
%i2 = insertelement <4 x float> %i1, float %x2, i32 2
%i3 = insertelement <4 x float> %i2, float %x2, i32 3
ret <4 x float> %i3
}
define <4 x float> @PR16739_byref_alt(<4 x float>* nocapture readonly dereferenceable(16) %x) {
; CHECK-LABEL: @PR16739_byref_alt(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <2 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 0
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> [[I0]], float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 2
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x float> [[I1]], float [[TMP4]], i32 2
; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[I3]]
;
%gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0
%gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1
%x0 = load float, float* %gep0
%x1 = load float, float* %gep1
%i0 = insertelement <4 x float> poison, float %x0, i32 0
%i1 = insertelement <4 x float> %i0, float %x0, i32 1
%i2 = insertelement <4 x float> %i1, float %x1, i32 2
%i3 = insertelement <4 x float> %i2, float %x1, i32 3
ret <4 x float> %i3
}
define <4 x float> @PR16739_byval(<4 x float>* nocapture readonly dereferenceable(16) %x) {
; CHECK-LABEL: @PR16739_byval(
; CHECK-NEXT: [[T0:%.*]] = bitcast <4 x float>* [[X:%.*]] to i64*
; CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[T0]], align 16
; CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2
; CHECK-NEXT: [[T3:%.*]] = bitcast float* [[T2]] to i64*
; CHECK-NEXT: [[T4:%.*]] = load i64, i64* [[T3]], align 8
; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T1]] to i32
; CHECK-NEXT: [[T6:%.*]] = bitcast i32 [[T5]] to float
; CHECK-NEXT: [[T7:%.*]] = insertelement <4 x float> poison, float [[T6]], i32 0
; CHECK-NEXT: [[T8:%.*]] = lshr i64 [[T1]], 32
; CHECK-NEXT: [[T9:%.*]] = trunc i64 [[T8]] to i32
; CHECK-NEXT: [[T10:%.*]] = bitcast i32 [[T9]] to float
; CHECK-NEXT: [[T11:%.*]] = insertelement <4 x float> [[T7]], float [[T10]], i32 1
; CHECK-NEXT: [[T12:%.*]] = trunc i64 [[T4]] to i32
; CHECK-NEXT: [[T13:%.*]] = bitcast i32 [[T12]] to float
; CHECK-NEXT: [[T14:%.*]] = insertelement <4 x float> [[T11]], float [[T13]], i32 2
; CHECK-NEXT: [[T15:%.*]] = insertelement <4 x float> [[T14]], float [[T13]], i32 3
; CHECK-NEXT: ret <4 x float> [[T15]]
;
%t0 = bitcast <4 x float>* %x to i64*
%t1 = load i64, i64* %t0, align 16
%t2 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 2
%t3 = bitcast float* %t2 to i64*
%t4 = load i64, i64* %t3, align 8
%t5 = trunc i64 %t1 to i32
%t6 = bitcast i32 %t5 to float
%t7 = insertelement <4 x float> poison, float %t6, i32 0
%t8 = lshr i64 %t1, 32
%t9 = trunc i64 %t8 to i32
%t10 = bitcast i32 %t9 to float
%t11 = insertelement <4 x float> %t7, float %t10, i32 1
%t12 = trunc i64 %t4 to i32
%t13 = bitcast i32 %t12 to float
%t14 = insertelement <4 x float> %t11, float %t13, i32 2
%t15 = insertelement <4 x float> %t14, float %t13, i32 3
ret <4 x float> %t15
}
define void @PR43578_prefer128(i32* %r, i64* %p, i64* %q) #0 {
; CHECK-LABEL: @PR43578_prefer128(
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 0
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3
; CHECK-NEXT: [[Q0:%.*]] = getelementptr inbounds i64, i64* [[Q:%.*]], i64 0
; CHECK-NEXT: [[Q1:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 1
; CHECK-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 2
; CHECK-NEXT: [[Q3:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[P0]] to <2 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[P2]] to <2 x i64>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 2
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[Q0]] to <2 x i64>*
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 2
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[Q2]] to <2 x i64>*
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 2
; CHECK-NEXT: [[TMP9:%.*]] = sub nsw <2 x i64> [[TMP2]], [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <2 x i64> [[TMP4]], [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds i32, i32* [[R:%.*]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i32, i32* [[R]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i32, i32* [[R]], i64 [[TMP13]]
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i32, i32* [[R]], i64 [[TMP14]]
; CHECK-NEXT: ret void
;
%p0 = getelementptr inbounds i64, i64* %p, i64 0
%p1 = getelementptr inbounds i64, i64* %p, i64 1
%p2 = getelementptr inbounds i64, i64* %p, i64 2
%p3 = getelementptr inbounds i64, i64* %p, i64 3
%q0 = getelementptr inbounds i64, i64* %q, i64 0
%q1 = getelementptr inbounds i64, i64* %q, i64 1
%q2 = getelementptr inbounds i64, i64* %q, i64 2
%q3 = getelementptr inbounds i64, i64* %q, i64 3
%x0 = load i64, i64* %p0, align 2
%x1 = load i64, i64* %p1, align 2
%x2 = load i64, i64* %p2, align 2
%x3 = load i64, i64* %p3, align 2
%y0 = load i64, i64* %q0, align 2
%y1 = load i64, i64* %q1, align 2
%y2 = load i64, i64* %q2, align 2
%y3 = load i64, i64* %q3, align 2
%sub0 = sub nsw i64 %x0, %y0
%sub1 = sub nsw i64 %x1, %y1
%sub2 = sub nsw i64 %x2, %y2
%sub3 = sub nsw i64 %x3, %y3
%g0 = getelementptr inbounds i32, i32* %r, i64 %sub0
%g1 = getelementptr inbounds i32, i32* %r, i64 %sub1
%g2 = getelementptr inbounds i32, i32* %r, i64 %sub2
%g3 = getelementptr inbounds i32, i32* %r, i64 %sub3
ret void
}
attributes #0 = { "prefer-vector-width"="128" }

View File

@ -0,0 +1,30 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
define <2 x float> @foo() {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SOURCE:%.*]] = insertelement <2 x float> poison, float undef, i32 0
; CHECK-NEXT: [[TMP0:%.*]] = fsub <2 x float> [[SOURCE]], [[SOURCE]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[RES2:%.*]] = insertelement <2 x float> [[RES1]], float [[TMP2]], i32 1
; CHECK-NEXT: ret <2 x float> [[RES2]]
;
entry:
%source = insertelement <2 x float> poison, float undef, i32 0
%e0 = extractelement <2 x float> %source, i32 0
%e0.dup = extractelement <2 x float> %source, i32 0
%sub1 = fsub float %e0, %e0.dup
%e1 = extractelement <2 x float> %source, i32 1
%e1.dup = extractelement <2 x float> %source, i32 1
%sub2 = fsub float %e1, %e1.dup
%res1 = insertelement <2 x float> poison, float %sub1, i32 0
%res2 = insertelement <2 x float> %res1, float %sub2, i32 1
ret <2 x float> %res2
}
!llvm.ident = !{!0, !0}
!0 = !{!"clang version 4.0.0 "}

View File

@ -0,0 +1,278 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
; See https://reviews.llvm.org/D70068 and https://reviews.llvm.org/D70587 for context
; Checks that vector insertvalues into the struct become SLP seeds.
define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) {
; CHECK-LABEL: @StructOfVectors(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2
; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3
; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1
; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0
; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1
; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]]
;
%GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
%L0 = load float, float * %GEP0
%GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
%L1 = load float, float * %GEP1
%GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
%L2 = load float, float * %GEP2
%GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
%L3 = load float, float * %GEP3
%Fadd0 = fadd fast float %L0, 1.1e+01
%Fadd1 = fadd fast float %L1, 1.2e+01
%Fadd2 = fadd fast float %L2, 1.3e+01
%Fadd3 = fadd fast float %L3, 1.4e+01
%VecIn0 = insertelement <2 x float> poison, float %Fadd0, i64 0
%VecIn1 = insertelement <2 x float> %VecIn0, float %Fadd1, i64 1
%VecIn2 = insertelement <2 x float> poison, float %Fadd2, i64 0
%VecIn3 = insertelement <2 x float> %VecIn2, float %Fadd3, i64 1
%Ret0 = insertvalue {<2 x float>, <2 x float>} undef, <2 x float> %VecIn1, 0
%Ret1 = insertvalue {<2 x float>, <2 x float>} %Ret0, <2 x float> %VecIn3, 1
ret {<2 x float>, <2 x float>} %Ret1
}
%StructTy = type { float, float}
define [2 x %StructTy] @ArrayOfStruct(float *%Ptr) {
; CHECK-LABEL: @ArrayOfStruct(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
; CHECK-NEXT: [[STRUCTIN0:%.*]] = insertvalue [[STRUCTTY:%.*]] undef, float [[TMP4]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
; CHECK-NEXT: [[STRUCTIN1:%.*]] = insertvalue [[STRUCTTY]] [[STRUCTIN0]], float [[TMP5]], 1
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2
; CHECK-NEXT: [[STRUCTIN2:%.*]] = insertvalue [[STRUCTTY]] undef, float [[TMP6]], 0
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3
; CHECK-NEXT: [[STRUCTIN3:%.*]] = insertvalue [[STRUCTTY]] [[STRUCTIN2]], float [[TMP7]], 1
; CHECK-NEXT: [[RET0:%.*]] = insertvalue [2 x %StructTy] undef, [[STRUCTTY]] [[STRUCTIN1]], 0
; CHECK-NEXT: [[RET1:%.*]] = insertvalue [2 x %StructTy] [[RET0]], [[STRUCTTY]] [[STRUCTIN3]], 1
; CHECK-NEXT: ret [2 x %StructTy] [[RET1]]
;
%GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
%L0 = load float, float * %GEP0
%GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
%L1 = load float, float * %GEP1
%GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
%L2 = load float, float * %GEP2
%GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
%L3 = load float, float * %GEP3
%Fadd0 = fadd fast float %L0, 1.1e+01
%Fadd1 = fadd fast float %L1, 1.2e+01
%Fadd2 = fadd fast float %L2, 1.3e+01
%Fadd3 = fadd fast float %L3, 1.4e+01
%StructIn0 = insertvalue %StructTy undef, float %Fadd0, 0
%StructIn1 = insertvalue %StructTy %StructIn0, float %Fadd1, 1
%StructIn2 = insertvalue %StructTy undef, float %Fadd2, 0
%StructIn3 = insertvalue %StructTy %StructIn2, float %Fadd3, 1
%Ret0 = insertvalue [2 x %StructTy] undef, %StructTy %StructIn1, 0
%Ret1 = insertvalue [2 x %StructTy] %Ret0, %StructTy %StructIn3, 1
ret [2 x %StructTy] %Ret1
}
define {%StructTy, %StructTy} @StructOfStruct(float *%Ptr) {
; CHECK-LABEL: @StructOfStruct(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
; CHECK-NEXT: [[STRUCTIN0:%.*]] = insertvalue [[STRUCTTY:%.*]] undef, float [[TMP4]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
; CHECK-NEXT: [[STRUCTIN1:%.*]] = insertvalue [[STRUCTTY]] [[STRUCTIN0]], float [[TMP5]], 1
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2
; CHECK-NEXT: [[STRUCTIN2:%.*]] = insertvalue [[STRUCTTY]] undef, float [[TMP6]], 0
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3
; CHECK-NEXT: [[STRUCTIN3:%.*]] = insertvalue [[STRUCTTY]] [[STRUCTIN2]], float [[TMP7]], 1
; CHECK-NEXT: [[RET0:%.*]] = insertvalue { [[STRUCTTY]], [[STRUCTTY]] } undef, [[STRUCTTY]] [[STRUCTIN1]], 0
; CHECK-NEXT: [[RET1:%.*]] = insertvalue { [[STRUCTTY]], [[STRUCTTY]] } [[RET0]], [[STRUCTTY]] [[STRUCTIN3]], 1
; CHECK-NEXT: ret { [[STRUCTTY]], [[STRUCTTY]] } [[RET1]]
;
%GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
%L0 = load float, float * %GEP0
%GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
%L1 = load float, float * %GEP1
%GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
%L2 = load float, float * %GEP2
%GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
%L3 = load float, float * %GEP3
%Fadd0 = fadd fast float %L0, 1.1e+01
%Fadd1 = fadd fast float %L1, 1.2e+01
%Fadd2 = fadd fast float %L2, 1.3e+01
%Fadd3 = fadd fast float %L3, 1.4e+01
%StructIn0 = insertvalue %StructTy undef, float %Fadd0, 0
%StructIn1 = insertvalue %StructTy %StructIn0, float %Fadd1, 1
%StructIn2 = insertvalue %StructTy undef, float %Fadd2, 0
%StructIn3 = insertvalue %StructTy %StructIn2, float %Fadd3, 1
%Ret0 = insertvalue {%StructTy, %StructTy} undef, %StructTy %StructIn1, 0
%Ret1 = insertvalue {%StructTy, %StructTy} %Ret0, %StructTy %StructIn3, 1
ret {%StructTy, %StructTy} %Ret1
}
define {%StructTy, float, float} @NonHomogeneousStruct(float *%Ptr) {
; CHECK-LABEL: @NonHomogeneousStruct(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
; CHECK-NEXT: [[L0:%.*]] = load float, float* [[GEP0]], align 4
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1
; CHECK-NEXT: [[L1:%.*]] = load float, float* [[GEP1]], align 4
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2
; CHECK-NEXT: [[L2:%.*]] = load float, float* [[GEP2]], align 4
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3
; CHECK-NEXT: [[L3:%.*]] = load float, float* [[GEP3]], align 4
; CHECK-NEXT: [[FADD0:%.*]] = fadd fast float [[L0]], 1.100000e+01
; CHECK-NEXT: [[FADD1:%.*]] = fadd fast float [[L1]], 1.200000e+01
; CHECK-NEXT: [[FADD2:%.*]] = fadd fast float [[L2]], 1.300000e+01
; CHECK-NEXT: [[FADD3:%.*]] = fadd fast float [[L3]], 1.400000e+01
; CHECK-NEXT: [[STRUCTIN0:%.*]] = insertvalue [[STRUCTTY:%.*]] undef, float [[FADD0]], 0
; CHECK-NEXT: [[STRUCTIN1:%.*]] = insertvalue [[STRUCTTY]] [[STRUCTIN0]], float [[FADD1]], 1
; CHECK-NEXT: [[RET0:%.*]] = insertvalue { [[STRUCTTY]], float, float } undef, [[STRUCTTY]] [[STRUCTIN1]], 0
; CHECK-NEXT: [[RET1:%.*]] = insertvalue { [[STRUCTTY]], float, float } [[RET0]], float [[FADD2]], 1
; CHECK-NEXT: [[RET2:%.*]] = insertvalue { [[STRUCTTY]], float, float } [[RET1]], float [[FADD3]], 2
; CHECK-NEXT: ret { [[STRUCTTY]], float, float } [[RET2]]
;
%GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
%L0 = load float, float * %GEP0
%GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
%L1 = load float, float * %GEP1
%GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
%L2 = load float, float * %GEP2
%GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
%L3 = load float, float * %GEP3
%Fadd0 = fadd fast float %L0, 1.1e+01
%Fadd1 = fadd fast float %L1, 1.2e+01
%Fadd2 = fadd fast float %L2, 1.3e+01
%Fadd3 = fadd fast float %L3, 1.4e+01
%StructIn0 = insertvalue %StructTy undef, float %Fadd0, 0
%StructIn1 = insertvalue %StructTy %StructIn0, float %Fadd1, 1
%Ret0 = insertvalue {%StructTy, float, float} undef, %StructTy %StructIn1, 0
%Ret1 = insertvalue {%StructTy, float, float} %Ret0, float %Fadd2, 1
%Ret2 = insertvalue {%StructTy, float, float} %Ret1, float %Fadd3, 2
ret {%StructTy, float, float} %Ret2
}
%Struct1Ty = type { i16, i16 }
%Struct2Ty = type { %Struct1Ty, %Struct1Ty}
define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) {
; CHECK-LABEL: @StructOfStructOfStruct(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 1
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 2
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 3
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 4
; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 5
; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 6
; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP0]] to <8 x i16>*
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[STRUCTIN0:%.*]] = insertvalue [[STRUCT1TY:%.*]] undef, i16 [[TMP4]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1
; CHECK-NEXT: [[STRUCTIN1:%.*]] = insertvalue [[STRUCT1TY]] [[STRUCTIN0]], i16 [[TMP5]], 1
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2
; CHECK-NEXT: [[STRUCTIN2:%.*]] = insertvalue [[STRUCT1TY]] undef, i16 [[TMP6]], 0
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
; CHECK-NEXT: [[STRUCTIN3:%.*]] = insertvalue [[STRUCT1TY]] [[STRUCTIN2]], i16 [[TMP7]], 1
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4
; CHECK-NEXT: [[STRUCTIN4:%.*]] = insertvalue [[STRUCT1TY]] undef, i16 [[TMP8]], 0
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5
; CHECK-NEXT: [[STRUCTIN5:%.*]] = insertvalue [[STRUCT1TY]] [[STRUCTIN4]], i16 [[TMP9]], 1
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6
; CHECK-NEXT: [[STRUCTIN6:%.*]] = insertvalue [[STRUCT1TY]] undef, i16 [[TMP10]], 0
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7
; CHECK-NEXT: [[STRUCTIN7:%.*]] = insertvalue [[STRUCT1TY]] [[STRUCTIN6]], i16 [[TMP11]], 1
; CHECK-NEXT: [[STRUCT2IN0:%.*]] = insertvalue [[STRUCT2TY:%.*]] undef, [[STRUCT1TY]] [[STRUCTIN1]], 0
; CHECK-NEXT: [[STRUCT2IN1:%.*]] = insertvalue [[STRUCT2TY]] [[STRUCT2IN0]], [[STRUCT1TY]] [[STRUCTIN3]], 1
; CHECK-NEXT: [[STRUCT2IN2:%.*]] = insertvalue [[STRUCT2TY]] undef, [[STRUCT1TY]] [[STRUCTIN5]], 0
; CHECK-NEXT: [[STRUCT2IN3:%.*]] = insertvalue [[STRUCT2TY]] [[STRUCT2IN2]], [[STRUCT1TY]] [[STRUCTIN7]], 1
; CHECK-NEXT: [[RET0:%.*]] = insertvalue { [[STRUCT2TY]], [[STRUCT2TY]] } undef, [[STRUCT2TY]] [[STRUCT2IN1]], 0
; CHECK-NEXT: [[RET1:%.*]] = insertvalue { [[STRUCT2TY]], [[STRUCT2TY]] } [[RET0]], [[STRUCT2TY]] [[STRUCT2IN3]], 1
; CHECK-NEXT: ret { [[STRUCT2TY]], [[STRUCT2TY]] } [[RET1]]
;
%GEP0 = getelementptr inbounds i16, i16* %Ptr, i64 0
%L0 = load i16, i16 * %GEP0
%GEP1 = getelementptr inbounds i16, i16* %Ptr, i64 1
%L1 = load i16, i16 * %GEP1
%GEP2 = getelementptr inbounds i16, i16* %Ptr, i64 2
%L2 = load i16, i16 * %GEP2
%GEP3 = getelementptr inbounds i16, i16* %Ptr, i64 3
%L3 = load i16, i16 * %GEP3
%GEP4 = getelementptr inbounds i16, i16* %Ptr, i64 4
%L4 = load i16, i16 * %GEP4
%GEP5 = getelementptr inbounds i16, i16* %Ptr, i64 5
%L5 = load i16, i16 * %GEP5
%GEP6 = getelementptr inbounds i16, i16* %Ptr, i64 6
%L6 = load i16, i16 * %GEP6
%GEP7 = getelementptr inbounds i16, i16* %Ptr, i64 7
%L7 = load i16, i16 * %GEP7
%Fadd0 = add i16 %L0, 1
%Fadd1 = add i16 %L1, 2
%Fadd2 = add i16 %L2, 3
%Fadd3 = add i16 %L3, 4
%Fadd4 = add i16 %L4, 5
%Fadd5 = add i16 %L5, 6
%Fadd6 = add i16 %L6, 7
%Fadd7 = add i16 %L7, 8
%StructIn0 = insertvalue %Struct1Ty undef, i16 %Fadd0, 0
%StructIn1 = insertvalue %Struct1Ty %StructIn0, i16 %Fadd1, 1
%StructIn2 = insertvalue %Struct1Ty undef, i16 %Fadd2, 0
%StructIn3 = insertvalue %Struct1Ty %StructIn2, i16 %Fadd3, 1
%StructIn4 = insertvalue %Struct1Ty undef, i16 %Fadd4, 0
%StructIn5 = insertvalue %Struct1Ty %StructIn4, i16 %Fadd5, 1
%StructIn6 = insertvalue %Struct1Ty undef, i16 %Fadd6, 0
%StructIn7 = insertvalue %Struct1Ty %StructIn6, i16 %Fadd7, 1
%Struct2In0 = insertvalue %Struct2Ty undef, %Struct1Ty %StructIn1, 0
%Struct2In1 = insertvalue %Struct2Ty %Struct2In0, %Struct1Ty %StructIn3, 1
%Struct2In2 = insertvalue %Struct2Ty undef, %Struct1Ty %StructIn5, 0
%Struct2In3 = insertvalue %Struct2Ty %Struct2In2, %Struct1Ty %StructIn7, 1
%Ret0 = insertvalue {%Struct2Ty, %Struct2Ty} undef, %Struct2Ty %Struct2In1, 0
%Ret1 = insertvalue {%Struct2Ty, %Struct2Ty} %Ret0, %Struct2Ty %Struct2In3, 1
ret {%Struct2Ty, %Struct2Ty} %Ret1
}

View File

@ -0,0 +1,118 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
; See https://reviews.llvm.org/D83779
define <2 x float> @foo({{float, float}}* %A) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast { { float, float } }* [[A:%.*]] to <2 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 2.000000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[INS0:%.*]] = insertelement <2 x float> [[INS1]], float [[TMP4]], i32 0
; CHECK-NEXT: ret <2 x float> [[INS0]]
;
entry:
%0 = bitcast {{float, float}}* %A to <2 x float>*
%1 = load <2 x float>, <2 x float>* %0
%L0 = extractelement <2 x float> %1, i32 0
%L1 = extractelement <2 x float> %1, i32 1
%Mul0 = fmul float %L0, 2.000000e+00
%Mul1 = fmul float %L1, 2.000000e+00
%Ins1 = insertelement <2 x float> poison, float %Mul1, i32 1
%Ins0 = insertelement <2 x float> %Ins1, float %Mul0, i32 0
ret <2 x float> %Ins0
}
%Struct1Ty = type { i16, i16 }
%Struct2Ty = type { %Struct1Ty, %Struct1Ty}
define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) {
; CHECK-LABEL: @StructOfStructOfStruct(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 1
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 2
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 3
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 4
; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 5
; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 6
; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP0]] to <8 x i16>*
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1
; CHECK-NEXT: [[STRUCTIN0:%.*]] = insertvalue [[STRUCT1TY:%.*]] undef, i16 [[TMP4]], 1
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[STRUCTIN1:%.*]] = insertvalue [[STRUCT1TY]] [[STRUCTIN0]], i16 [[TMP5]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2
; CHECK-NEXT: [[STRUCTIN2:%.*]] = insertvalue [[STRUCT1TY]] undef, i16 [[TMP6]], 0
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
; CHECK-NEXT: [[STRUCTIN3:%.*]] = insertvalue [[STRUCT1TY]] [[STRUCTIN2]], i16 [[TMP7]], 1
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4
; CHECK-NEXT: [[STRUCTIN4:%.*]] = insertvalue [[STRUCT1TY]] undef, i16 [[TMP8]], 0
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5
; CHECK-NEXT: [[STRUCTIN5:%.*]] = insertvalue [[STRUCT1TY]] [[STRUCTIN4]], i16 [[TMP9]], 1
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7
; CHECK-NEXT: [[STRUCTIN6:%.*]] = insertvalue [[STRUCT1TY]] undef, i16 [[TMP10]], 1
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6
; CHECK-NEXT: [[STRUCTIN7:%.*]] = insertvalue [[STRUCT1TY]] [[STRUCTIN6]], i16 [[TMP11]], 0
; CHECK-NEXT: [[STRUCT2IN0:%.*]] = insertvalue [[STRUCT2TY:%.*]] undef, [[STRUCT1TY]] [[STRUCTIN1]], 0
; CHECK-NEXT: [[STRUCT2IN1:%.*]] = insertvalue [[STRUCT2TY]] [[STRUCT2IN0]], [[STRUCT1TY]] [[STRUCTIN3]], 1
; CHECK-NEXT: [[STRUCT2IN2:%.*]] = insertvalue [[STRUCT2TY]] undef, [[STRUCT1TY]] [[STRUCTIN5]], 0
; CHECK-NEXT: [[STRUCT2IN3:%.*]] = insertvalue [[STRUCT2TY]] [[STRUCT2IN2]], [[STRUCT1TY]] [[STRUCTIN7]], 1
; CHECK-NEXT: [[RET0:%.*]] = insertvalue { [[STRUCT2TY]], [[STRUCT2TY]] } undef, [[STRUCT2TY]] [[STRUCT2IN3]], 1
; CHECK-NEXT: [[RET1:%.*]] = insertvalue { [[STRUCT2TY]], [[STRUCT2TY]] } [[RET0]], [[STRUCT2TY]] [[STRUCT2IN1]], 0
; CHECK-NEXT: ret { [[STRUCT2TY]], [[STRUCT2TY]] } [[RET1]]
;
%GEP0 = getelementptr inbounds i16, i16* %Ptr, i64 0
%L0 = load i16, i16 * %GEP0
%GEP1 = getelementptr inbounds i16, i16* %Ptr, i64 1
%L1 = load i16, i16 * %GEP1
%GEP2 = getelementptr inbounds i16, i16* %Ptr, i64 2
%L2 = load i16, i16 * %GEP2
%GEP3 = getelementptr inbounds i16, i16* %Ptr, i64 3
%L3 = load i16, i16 * %GEP3
%GEP4 = getelementptr inbounds i16, i16* %Ptr, i64 4
%L4 = load i16, i16 * %GEP4
%GEP5 = getelementptr inbounds i16, i16* %Ptr, i64 5
%L5 = load i16, i16 * %GEP5
%GEP6 = getelementptr inbounds i16, i16* %Ptr, i64 6
%L6 = load i16, i16 * %GEP6
%GEP7 = getelementptr inbounds i16, i16* %Ptr, i64 7
%L7 = load i16, i16 * %GEP7
%Fadd0 = add i16 %L0, 1
%Fadd1 = add i16 %L1, 2
%Fadd2 = add i16 %L2, 3
%Fadd3 = add i16 %L3, 4
%Fadd4 = add i16 %L4, 5
%Fadd5 = add i16 %L5, 6
%Fadd6 = add i16 %L6, 7
%Fadd7 = add i16 %L7, 8
%StructIn0 = insertvalue %Struct1Ty undef, i16 %Fadd1, 1
%StructIn1 = insertvalue %Struct1Ty %StructIn0, i16 %Fadd0, 0
%StructIn2 = insertvalue %Struct1Ty undef, i16 %Fadd2, 0
%StructIn3 = insertvalue %Struct1Ty %StructIn2, i16 %Fadd3, 1
%StructIn4 = insertvalue %Struct1Ty undef, i16 %Fadd4, 0
%StructIn5 = insertvalue %Struct1Ty %StructIn4, i16 %Fadd5, 1
%StructIn6 = insertvalue %Struct1Ty undef, i16 %Fadd7, 1
%StructIn7 = insertvalue %Struct1Ty %StructIn6, i16 %Fadd6, 0
%Struct2In0 = insertvalue %Struct2Ty undef, %Struct1Ty %StructIn1, 0
%Struct2In1 = insertvalue %Struct2Ty %Struct2In0, %Struct1Ty %StructIn3, 1
%Struct2In2 = insertvalue %Struct2Ty undef, %Struct1Ty %StructIn5, 0
%Struct2In3 = insertvalue %Struct2Ty %Struct2In2, %Struct1Ty %StructIn7, 1
%Ret0 = insertvalue {%Struct2Ty, %Struct2Ty} undef, %Struct2Ty %Struct2In3, 1
%Ret1 = insertvalue {%Struct2Ty, %Struct2Ty} %Ret0, %Struct2Ty %Struct2In1, 0
ret {%Struct2Ty, %Struct2Ty} %Ret1
}

View File

@ -0,0 +1,664 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
define void @gather_load(i32* noalias nocapture %0, i32* noalias nocapture readonly %1) {
; CHECK-LABEL: @gather_load(
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4, [[TBAA0:!tbaa !.*]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, [[TBAA0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, [[TBAA0]]
; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4, [[TBAA0]]
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i32 2
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i32 3
; CHECK-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP15]], align 4, [[TBAA0]]
; CHECK-NEXT: ret void
;
%3 = getelementptr inbounds i32, i32* %1, i64 1
%4 = load i32, i32* %1, align 4, !tbaa !2
%5 = getelementptr inbounds i32, i32* %0, i64 1
%6 = getelementptr inbounds i32, i32* %1, i64 11
%7 = load i32, i32* %6, align 4, !tbaa !2
%8 = getelementptr inbounds i32, i32* %0, i64 2
%9 = getelementptr inbounds i32, i32* %1, i64 4
%10 = load i32, i32* %9, align 4, !tbaa !2
%11 = getelementptr inbounds i32, i32* %0, i64 3
%12 = load i32, i32* %3, align 4, !tbaa !2
%13 = insertelement <4 x i32> poison, i32 %4, i32 0
%14 = insertelement <4 x i32> %13, i32 %7, i32 1
%15 = insertelement <4 x i32> %14, i32 %10, i32 2
%16 = insertelement <4 x i32> %15, i32 %12, i32 3
%17 = add nsw <4 x i32> %16, <i32 1, i32 2, i32 3, i32 4>
%18 = bitcast i32* %0 to <4 x i32>*
store <4 x i32> %17, <4 x i32>* %18, align 4, !tbaa !2
ret void
}
define void @gather_load_2(i32* noalias nocapture %0, i32* noalias nocapture readonly %1) {
; SSE-LABEL: @gather_load_2(
; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
; SSE-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, [[TBAA0:!tbaa !.*]]
; SSE-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1
; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
; SSE-NEXT: store i32 [[TMP5]], i32* [[TMP0]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10
; SSE-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2
; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2
; SSE-NEXT: store i32 [[TMP9]], i32* [[TMP6]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3
; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3
; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3
; SSE-NEXT: store i32 [[TMP13]], i32* [[TMP10]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
; SSE-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4
; SSE-NEXT: store i32 [[TMP17]], i32* [[TMP14]], align 4, [[TBAA0]]
; SSE-NEXT: ret void
;
; AVX-LABEL: @gather_load_2(
; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
; AVX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, [[TBAA0:!tbaa !.*]]
; AVX-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1
; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
; AVX-NEXT: store i32 [[TMP5]], i32* [[TMP0]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10
; AVX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2
; AVX-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2
; AVX-NEXT: store i32 [[TMP9]], i32* [[TMP6]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3
; AVX-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3
; AVX-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3
; AVX-NEXT: store i32 [[TMP13]], i32* [[TMP10]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
; AVX-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4
; AVX-NEXT: store i32 [[TMP17]], i32* [[TMP14]], align 4, [[TBAA0]]
; AVX-NEXT: ret void
;
; AVX2-LABEL: @gather_load_2(
; AVX2-NEXT: [[TMP3:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1:%.*]], i32 0
; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32*> [[TMP3]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[TMP5:%.*]] = getelementptr i32, <4 x i32*> [[TMP4]], <4 x i64> <i64 1, i64 10, i64 3, i64 5>
; AVX2-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0:!tbaa !.*]]
; AVX2-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], <i32 1, i32 2, i32 3, i32 4>
; AVX2-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
; AVX2-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4, [[TBAA0]]
; AVX2-NEXT: ret void
;
; AVX512-LABEL: @gather_load_2(
; AVX512-NEXT: [[TMP3:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1:%.*]], i32 0
; AVX512-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32*> [[TMP3]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: [[TMP5:%.*]] = getelementptr i32, <4 x i32*> [[TMP4]], <4 x i64> <i64 1, i64 10, i64 3, i64 5>
; AVX512-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0:!tbaa !.*]]
; AVX512-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], <i32 1, i32 2, i32 3, i32 4>
; AVX512-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
; AVX512-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4, [[TBAA0]]
; AVX512-NEXT: ret void
;
%3 = getelementptr inbounds i32, i32* %1, i64 1
%4 = load i32, i32* %3, align 4, !tbaa !2
%5 = add nsw i32 %4, 1
%6 = getelementptr inbounds i32, i32* %0, i64 1
store i32 %5, i32* %0, align 4, !tbaa !2
%7 = getelementptr inbounds i32, i32* %1, i64 10
%8 = load i32, i32* %7, align 4, !tbaa !2
%9 = add nsw i32 %8, 2
%10 = getelementptr inbounds i32, i32* %0, i64 2
store i32 %9, i32* %6, align 4, !tbaa !2
%11 = getelementptr inbounds i32, i32* %1, i64 3
%12 = load i32, i32* %11, align 4, !tbaa !2
%13 = add nsw i32 %12, 3
%14 = getelementptr inbounds i32, i32* %0, i64 3
store i32 %13, i32* %10, align 4, !tbaa !2
%15 = getelementptr inbounds i32, i32* %1, i64 5
%16 = load i32, i32* %15, align 4, !tbaa !2
%17 = add nsw i32 %16, 4
store i32 %17, i32* %14, align 4, !tbaa !2
ret void
}
define void @gather_load_3(i32* noalias nocapture %0, i32* noalias nocapture readonly %1) {
; SSE-LABEL: @gather_load_3(
; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1
; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
; SSE-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11
; SSE-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 2
; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2
; SSE-NEXT: store i32 [[TMP8]], i32* [[TMP5]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
; SSE-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 3
; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3
; SSE-NEXT: store i32 [[TMP12]], i32* [[TMP9]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 15
; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 4
; SSE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4
; SSE-NEXT: store i32 [[TMP16]], i32* [[TMP13]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 18
; SSE-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 1
; SSE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 5
; SSE-NEXT: store i32 [[TMP20]], i32* [[TMP17]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9
; SSE-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], 2
; SSE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 6
; SSE-NEXT: store i32 [[TMP24]], i32* [[TMP21]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6
; SSE-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 3
; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 7
; SSE-NEXT: store i32 [[TMP28]], i32* [[TMP25]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21
; SSE-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 4
; SSE-NEXT: store i32 [[TMP32]], i32* [[TMP29]], align 4, [[TBAA0]]
; SSE-NEXT: ret void
;
; AVX-LABEL: @gather_load_3(
; AVX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1
; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
; AVX-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11
; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 2
; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2
; AVX-NEXT: store i32 [[TMP8]], i32* [[TMP5]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
; AVX-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 3
; AVX-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3
; AVX-NEXT: store i32 [[TMP12]], i32* [[TMP9]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 15
; AVX-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 4
; AVX-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4
; AVX-NEXT: store i32 [[TMP16]], i32* [[TMP13]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 18
; AVX-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 1
; AVX-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 5
; AVX-NEXT: store i32 [[TMP20]], i32* [[TMP17]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9
; AVX-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], 2
; AVX-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 6
; AVX-NEXT: store i32 [[TMP24]], i32* [[TMP21]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6
; AVX-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 3
; AVX-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 7
; AVX-NEXT: store i32 [[TMP28]], i32* [[TMP25]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21
; AVX-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, [[TBAA0]]
; AVX-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 4
; AVX-NEXT: store i32 [[TMP32]], i32* [[TMP29]], align 4, [[TBAA0]]
; AVX-NEXT: ret void
;
; AVX2-LABEL: @gather_load_3(
; AVX2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4, [[TBAA0]]
; AVX2-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1
; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
; AVX2-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4, [[TBAA0]]
; AVX2-NEXT: [[TMP6:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32*> [[TMP6]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[TMP8:%.*]] = getelementptr i32, <4 x i32*> [[TMP7]], <4 x i64> <i64 11, i64 4, i64 15, i64 18>
; AVX2-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP8]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0]]
; AVX2-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], <i32 2, i32 3, i32 4, i32 1>
; AVX2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 5
; AVX2-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
; AVX2-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4, [[TBAA0]]
; AVX2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9
; AVX2-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4, [[TBAA0]]
; AVX2-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 2
; AVX2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 6
; AVX2-NEXT: store i32 [[TMP15]], i32* [[TMP11]], align 4, [[TBAA0]]
; AVX2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6
; AVX2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4, [[TBAA0]]
; AVX2-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], 3
; AVX2-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 7
; AVX2-NEXT: store i32 [[TMP19]], i32* [[TMP16]], align 4, [[TBAA0]]
; AVX2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21
; AVX2-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4, [[TBAA0]]
; AVX2-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], 4
; AVX2-NEXT: store i32 [[TMP23]], i32* [[TMP20]], align 4, [[TBAA0]]
; AVX2-NEXT: ret void
;
; AVX512-LABEL: @gather_load_3(
; AVX512-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4, [[TBAA0]]
; AVX512-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1
; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
; AVX512-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4, [[TBAA0]]
; AVX512-NEXT: [[TMP6:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32*> [[TMP6]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: [[TMP8:%.*]] = getelementptr i32, <4 x i32*> [[TMP7]], <4 x i64> <i64 11, i64 4, i64 15, i64 18>
; AVX512-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP8]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0]]
; AVX512-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], <i32 2, i32 3, i32 4, i32 1>
; AVX512-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 5
; AVX512-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
; AVX512-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4, [[TBAA0]]
; AVX512-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9
; AVX512-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4, [[TBAA0]]
; AVX512-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 2
; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 6
; AVX512-NEXT: store i32 [[TMP15]], i32* [[TMP11]], align 4, [[TBAA0]]
; AVX512-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6
; AVX512-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4, [[TBAA0]]
; AVX512-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], 3
; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 7
; AVX512-NEXT: store i32 [[TMP19]], i32* [[TMP16]], align 4, [[TBAA0]]
; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21
; AVX512-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4, [[TBAA0]]
; AVX512-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], 4
; AVX512-NEXT: store i32 [[TMP23]], i32* [[TMP20]], align 4, [[TBAA0]]
; AVX512-NEXT: ret void
;
%3 = load i32, i32* %1, align 4, !tbaa !2
%4 = add i32 %3, 1
%5 = getelementptr inbounds i32, i32* %0, i64 1
store i32 %4, i32* %0, align 4, !tbaa !2
%6 = getelementptr inbounds i32, i32* %1, i64 11
%7 = load i32, i32* %6, align 4, !tbaa !2
%8 = add i32 %7, 2
%9 = getelementptr inbounds i32, i32* %0, i64 2
store i32 %8, i32* %5, align 4, !tbaa !2
%10 = getelementptr inbounds i32, i32* %1, i64 4
%11 = load i32, i32* %10, align 4, !tbaa !2
%12 = add i32 %11, 3
%13 = getelementptr inbounds i32, i32* %0, i64 3
store i32 %12, i32* %9, align 4, !tbaa !2
%14 = getelementptr inbounds i32, i32* %1, i64 15
%15 = load i32, i32* %14, align 4, !tbaa !2
%16 = add i32 %15, 4
%17 = getelementptr inbounds i32, i32* %0, i64 4
store i32 %16, i32* %13, align 4, !tbaa !2
%18 = getelementptr inbounds i32, i32* %1, i64 18
%19 = load i32, i32* %18, align 4, !tbaa !2
%20 = add i32 %19, 1
%21 = getelementptr inbounds i32, i32* %0, i64 5
store i32 %20, i32* %17, align 4, !tbaa !2
%22 = getelementptr inbounds i32, i32* %1, i64 9
%23 = load i32, i32* %22, align 4, !tbaa !2
%24 = add i32 %23, 2
%25 = getelementptr inbounds i32, i32* %0, i64 6
store i32 %24, i32* %21, align 4, !tbaa !2
%26 = getelementptr inbounds i32, i32* %1, i64 6
%27 = load i32, i32* %26, align 4, !tbaa !2
%28 = add i32 %27, 3
%29 = getelementptr inbounds i32, i32* %0, i64 7
store i32 %28, i32* %25, align 4, !tbaa !2
%30 = getelementptr inbounds i32, i32* %1, i64 21
%31 = load i32, i32* %30, align 4, !tbaa !2
%32 = add i32 %31, 4
store i32 %32, i32* %29, align 4, !tbaa !2
ret void
}
define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture readonly %t1) {
; SSE-LABEL: @gather_load_4(
; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 1
; SSE-NEXT: [[T6:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 11
; SSE-NEXT: [[T9:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 2
; SSE-NEXT: [[T10:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 4
; SSE-NEXT: [[T13:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 3
; SSE-NEXT: [[T14:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 15
; SSE-NEXT: [[T17:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 4
; SSE-NEXT: [[T18:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 18
; SSE-NEXT: [[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5
; SSE-NEXT: [[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 9
; SSE-NEXT: [[T25:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 6
; SSE-NEXT: [[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6
; SSE-NEXT: [[T29:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 7
; SSE-NEXT: [[T30:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 21
; SSE-NEXT: [[T3:%.*]] = load i32, i32* [[T1]], align 4, [[TBAA0]]
; SSE-NEXT: [[T7:%.*]] = load i32, i32* [[T6]], align 4, [[TBAA0]]
; SSE-NEXT: [[T11:%.*]] = load i32, i32* [[T10]], align 4, [[TBAA0]]
; SSE-NEXT: [[T15:%.*]] = load i32, i32* [[T14]], align 4, [[TBAA0]]
; SSE-NEXT: [[T19:%.*]] = load i32, i32* [[T18]], align 4, [[TBAA0]]
; SSE-NEXT: [[T23:%.*]] = load i32, i32* [[T22]], align 4, [[TBAA0]]
; SSE-NEXT: [[T27:%.*]] = load i32, i32* [[T26]], align 4, [[TBAA0]]
; SSE-NEXT: [[T31:%.*]] = load i32, i32* [[T30]], align 4, [[TBAA0]]
; SSE-NEXT: [[T4:%.*]] = add i32 [[T3]], 1
; SSE-NEXT: [[T8:%.*]] = add i32 [[T7]], 2
; SSE-NEXT: [[T12:%.*]] = add i32 [[T11]], 3
; SSE-NEXT: [[T16:%.*]] = add i32 [[T15]], 4
; SSE-NEXT: [[T20:%.*]] = add i32 [[T19]], 1
; SSE-NEXT: [[T24:%.*]] = add i32 [[T23]], 2
; SSE-NEXT: [[T28:%.*]] = add i32 [[T27]], 3
; SSE-NEXT: [[T32:%.*]] = add i32 [[T31]], 4
; SSE-NEXT: store i32 [[T4]], i32* [[T0]], align 4, [[TBAA0]]
; SSE-NEXT: store i32 [[T8]], i32* [[T5]], align 4, [[TBAA0]]
; SSE-NEXT: store i32 [[T12]], i32* [[T9]], align 4, [[TBAA0]]
; SSE-NEXT: store i32 [[T16]], i32* [[T13]], align 4, [[TBAA0]]
; SSE-NEXT: store i32 [[T20]], i32* [[T17]], align 4, [[TBAA0]]
; SSE-NEXT: store i32 [[T24]], i32* [[T21]], align 4, [[TBAA0]]
; SSE-NEXT: store i32 [[T28]], i32* [[T25]], align 4, [[TBAA0]]
; SSE-NEXT: store i32 [[T32]], i32* [[T29]], align 4, [[TBAA0]]
; SSE-NEXT: ret void
;
; AVX-LABEL: @gather_load_4(
; AVX-NEXT: [[T5:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 1
; AVX-NEXT: [[T6:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 11
; AVX-NEXT: [[T9:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 2
; AVX-NEXT: [[T10:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 4
; AVX-NEXT: [[T13:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 3
; AVX-NEXT: [[T14:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 15
; AVX-NEXT: [[T17:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 4
; AVX-NEXT: [[T18:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 18
; AVX-NEXT: [[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5
; AVX-NEXT: [[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 9
; AVX-NEXT: [[T25:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 6
; AVX-NEXT: [[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6
; AVX-NEXT: [[T29:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 7
; AVX-NEXT: [[T30:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 21
; AVX-NEXT: [[T3:%.*]] = load i32, i32* [[T1]], align 4, [[TBAA0]]
; AVX-NEXT: [[T7:%.*]] = load i32, i32* [[T6]], align 4, [[TBAA0]]
; AVX-NEXT: [[T11:%.*]] = load i32, i32* [[T10]], align 4, [[TBAA0]]
; AVX-NEXT: [[T15:%.*]] = load i32, i32* [[T14]], align 4, [[TBAA0]]
; AVX-NEXT: [[T19:%.*]] = load i32, i32* [[T18]], align 4, [[TBAA0]]
; AVX-NEXT: [[T23:%.*]] = load i32, i32* [[T22]], align 4, [[TBAA0]]
; AVX-NEXT: [[T27:%.*]] = load i32, i32* [[T26]], align 4, [[TBAA0]]
; AVX-NEXT: [[T31:%.*]] = load i32, i32* [[T30]], align 4, [[TBAA0]]
; AVX-NEXT: [[T4:%.*]] = add i32 [[T3]], 1
; AVX-NEXT: [[T8:%.*]] = add i32 [[T7]], 2
; AVX-NEXT: [[T12:%.*]] = add i32 [[T11]], 3
; AVX-NEXT: [[T16:%.*]] = add i32 [[T15]], 4
; AVX-NEXT: [[T20:%.*]] = add i32 [[T19]], 1
; AVX-NEXT: [[T24:%.*]] = add i32 [[T23]], 2
; AVX-NEXT: [[T28:%.*]] = add i32 [[T27]], 3
; AVX-NEXT: [[T32:%.*]] = add i32 [[T31]], 4
; AVX-NEXT: store i32 [[T4]], i32* [[T0]], align 4, [[TBAA0]]
; AVX-NEXT: store i32 [[T8]], i32* [[T5]], align 4, [[TBAA0]]
; AVX-NEXT: store i32 [[T12]], i32* [[T9]], align 4, [[TBAA0]]
; AVX-NEXT: store i32 [[T16]], i32* [[T13]], align 4, [[TBAA0]]
; AVX-NEXT: store i32 [[T20]], i32* [[T17]], align 4, [[TBAA0]]
; AVX-NEXT: store i32 [[T24]], i32* [[T21]], align 4, [[TBAA0]]
; AVX-NEXT: store i32 [[T28]], i32* [[T25]], align 4, [[TBAA0]]
; AVX-NEXT: store i32 [[T32]], i32* [[T29]], align 4, [[TBAA0]]
; AVX-NEXT: ret void
;
; AVX2-LABEL: @gather_load_4(
; AVX2-NEXT: [[T5:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 1
; AVX2-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> undef, i32* [[T1:%.*]], i32 0
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32*> [[TMP1]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[TMP3:%.*]] = getelementptr i32, <4 x i32*> [[TMP2]], <4 x i64> <i64 11, i64 4, i64 15, i64 18>
; AVX2-NEXT: [[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5
; AVX2-NEXT: [[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 9
; AVX2-NEXT: [[T25:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 6
; AVX2-NEXT: [[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6
; AVX2-NEXT: [[T29:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 7
; AVX2-NEXT: [[T30:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 21
; AVX2-NEXT: [[T3:%.*]] = load i32, i32* [[T1]], align 4, [[TBAA0]]
; AVX2-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP3]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0]]
; AVX2-NEXT: [[T23:%.*]] = load i32, i32* [[T22]], align 4, [[TBAA0]]
; AVX2-NEXT: [[T27:%.*]] = load i32, i32* [[T26]], align 4, [[TBAA0]]
; AVX2-NEXT: [[T31:%.*]] = load i32, i32* [[T30]], align 4, [[TBAA0]]
; AVX2-NEXT: [[T4:%.*]] = add i32 [[T3]], 1
; AVX2-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], <i32 2, i32 3, i32 4, i32 1>
; AVX2-NEXT: [[T24:%.*]] = add i32 [[T23]], 2
; AVX2-NEXT: [[T28:%.*]] = add i32 [[T27]], 3
; AVX2-NEXT: [[T32:%.*]] = add i32 [[T31]], 4
; AVX2-NEXT: store i32 [[T4]], i32* [[T0]], align 4, [[TBAA0]]
; AVX2-NEXT: [[TMP6:%.*]] = bitcast i32* [[T5]] to <4 x i32>*
; AVX2-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4, [[TBAA0]]
; AVX2-NEXT: store i32 [[T24]], i32* [[T21]], align 4, [[TBAA0]]
; AVX2-NEXT: store i32 [[T28]], i32* [[T25]], align 4, [[TBAA0]]
; AVX2-NEXT: store i32 [[T32]], i32* [[T29]], align 4, [[TBAA0]]
; AVX2-NEXT: ret void
;
; AVX512-LABEL: @gather_load_4(
; AVX512-NEXT: [[T5:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 1
; AVX512-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> undef, i32* [[T1:%.*]], i32 0
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32*> [[TMP1]], <4 x i32*> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: [[TMP3:%.*]] = getelementptr i32, <4 x i32*> [[TMP2]], <4 x i64> <i64 11, i64 4, i64 15, i64 18>
; AVX512-NEXT: [[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5
; AVX512-NEXT: [[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 9
; AVX512-NEXT: [[T25:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 6
; AVX512-NEXT: [[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6
; AVX512-NEXT: [[T29:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 7
; AVX512-NEXT: [[T30:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 21
; AVX512-NEXT: [[T3:%.*]] = load i32, i32* [[T1]], align 4, [[TBAA0]]
; AVX512-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP3]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), [[TBAA0]]
; AVX512-NEXT: [[T23:%.*]] = load i32, i32* [[T22]], align 4, [[TBAA0]]
; AVX512-NEXT: [[T27:%.*]] = load i32, i32* [[T26]], align 4, [[TBAA0]]
; AVX512-NEXT: [[T31:%.*]] = load i32, i32* [[T30]], align 4, [[TBAA0]]
; AVX512-NEXT: [[T4:%.*]] = add i32 [[T3]], 1
; AVX512-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], <i32 2, i32 3, i32 4, i32 1>
; AVX512-NEXT: [[T24:%.*]] = add i32 [[T23]], 2
; AVX512-NEXT: [[T28:%.*]] = add i32 [[T27]], 3
; AVX512-NEXT: [[T32:%.*]] = add i32 [[T31]], 4
; AVX512-NEXT: store i32 [[T4]], i32* [[T0]], align 4, [[TBAA0]]
; AVX512-NEXT: [[TMP6:%.*]] = bitcast i32* [[T5]] to <4 x i32>*
; AVX512-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4, [[TBAA0]]
; AVX512-NEXT: store i32 [[T24]], i32* [[T21]], align 4, [[TBAA0]]
; AVX512-NEXT: store i32 [[T28]], i32* [[T25]], align 4, [[TBAA0]]
; AVX512-NEXT: store i32 [[T32]], i32* [[T29]], align 4, [[TBAA0]]
; AVX512-NEXT: ret void
;
%t5 = getelementptr inbounds i32, i32* %t0, i64 1
%t6 = getelementptr inbounds i32, i32* %t1, i64 11
%t9 = getelementptr inbounds i32, i32* %t0, i64 2
%t10 = getelementptr inbounds i32, i32* %t1, i64 4
%t13 = getelementptr inbounds i32, i32* %t0, i64 3
%t14 = getelementptr inbounds i32, i32* %t1, i64 15
%t17 = getelementptr inbounds i32, i32* %t0, i64 4
%t18 = getelementptr inbounds i32, i32* %t1, i64 18
%t21 = getelementptr inbounds i32, i32* %t0, i64 5
%t22 = getelementptr inbounds i32, i32* %t1, i64 9
%t25 = getelementptr inbounds i32, i32* %t0, i64 6
%t26 = getelementptr inbounds i32, i32* %t1, i64 6
%t29 = getelementptr inbounds i32, i32* %t0, i64 7
%t30 = getelementptr inbounds i32, i32* %t1, i64 21
%t3 = load i32, i32* %t1, align 4, !tbaa !2
%t7 = load i32, i32* %t6, align 4, !tbaa !2
%t11 = load i32, i32* %t10, align 4, !tbaa !2
%t15 = load i32, i32* %t14, align 4, !tbaa !2
%t19 = load i32, i32* %t18, align 4, !tbaa !2
%t23 = load i32, i32* %t22, align 4, !tbaa !2
%t27 = load i32, i32* %t26, align 4, !tbaa !2
%t31 = load i32, i32* %t30, align 4, !tbaa !2
%t4 = add i32 %t3, 1
%t8 = add i32 %t7, 2
%t12 = add i32 %t11, 3
%t16 = add i32 %t15, 4
%t20 = add i32 %t19, 1
%t24 = add i32 %t23, 2
%t28 = add i32 %t27, 3
%t32 = add i32 %t31, 4
store i32 %t4, i32* %t0, align 4, !tbaa !2
store i32 %t8, i32* %t5, align 4, !tbaa !2
store i32 %t12, i32* %t9, align 4, !tbaa !2
store i32 %t16, i32* %t13, align 4, !tbaa !2
store i32 %t20, i32* %t17, align 4, !tbaa !2
store i32 %t24, i32* %t21, align 4, !tbaa !2
store i32 %t28, i32* %t25, align 4, !tbaa !2
store i32 %t32, i32* %t29, align 4, !tbaa !2
ret void
}
define void @gather_load_div(float* noalias nocapture %0, float* noalias nocapture readonly %1) {
; SSE-LABEL: @gather_load_div(
; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 10
; SSE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x float*> undef, float* [[TMP1]], i32 0
; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x float*> [[TMP6]], float* [[TMP3]], i32 1
; SSE-NEXT: [[TMP8:%.*]] = insertelement <4 x float*> [[TMP7]], float* [[TMP4]], i32 2
; SSE-NEXT: [[TMP9:%.*]] = insertelement <4 x float*> [[TMP8]], float* [[TMP5]], i32 3
; SSE-NEXT: [[TMP10:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[TMP9]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef), [[TBAA0]]
; SSE-NEXT: [[TMP11:%.*]] = shufflevector <4 x float*> [[TMP6]], <4 x float*> undef, <4 x i32> zeroinitializer
; SSE-NEXT: [[TMP12:%.*]] = getelementptr float, <4 x float*> [[TMP11]], <4 x i64> <i64 4, i64 13, i64 11, i64 44>
; SSE-NEXT: [[TMP13:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[TMP12]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef), [[TBAA0]]
; SSE-NEXT: [[TMP14:%.*]] = fdiv <4 x float> [[TMP10]], [[TMP13]]
; SSE-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP0:%.*]], i64 4
; SSE-NEXT: [[TMP16:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
; SSE-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[TMP16]], align 4, [[TBAA0]]
; SSE-NEXT: [[TMP17:%.*]] = getelementptr float, <4 x float*> [[TMP11]], <4 x i64> <i64 17, i64 8, i64 5, i64 20>
; SSE-NEXT: [[TMP18:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[TMP17]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef), [[TBAA0]]
; SSE-NEXT: [[TMP19:%.*]] = getelementptr float, <4 x float*> [[TMP11]], <4 x i64> <i64 33, i64 30, i64 27, i64 23>
; SSE-NEXT: [[TMP20:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[TMP19]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef), [[TBAA0]]
; SSE-NEXT: [[TMP21:%.*]] = fdiv <4 x float> [[TMP18]], [[TMP20]]
; SSE-NEXT: [[TMP22:%.*]] = bitcast float* [[TMP15]] to <4 x float>*
; SSE-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[TMP22]], align 4, [[TBAA0]]
; SSE-NEXT: ret void
;
; AVX-LABEL: @gather_load_div(
; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 10
; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> undef, float* [[TMP1]], i32 0
; AVX-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3
; AVX-NEXT: [[TMP14:%.*]] = insertelement <8 x float*> [[TMP13]], float* [[TMP6]], i32 4
; AVX-NEXT: [[TMP15:%.*]] = insertelement <8 x float*> [[TMP14]], float* [[TMP7]], i32 5
; AVX-NEXT: [[TMP16:%.*]] = insertelement <8 x float*> [[TMP15]], float* [[TMP8]], i32 6
; AVX-NEXT: [[TMP17:%.*]] = insertelement <8 x float*> [[TMP16]], float* [[TMP9]], i32 7
; AVX-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP17]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), [[TBAA0]]
; AVX-NEXT: [[TMP19:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX-NEXT: [[TMP20:%.*]] = getelementptr float, <8 x float*> [[TMP19]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX-NEXT: [[TMP21:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP20]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), [[TBAA0]]
; AVX-NEXT: [[TMP22:%.*]] = fdiv <8 x float> [[TMP18]], [[TMP21]]
; AVX-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX-NEXT: store <8 x float> [[TMP22]], <8 x float>* [[TMP23]], align 4, [[TBAA0]]
; AVX-NEXT: ret void
;
; AVX2-LABEL: @gather_load_div(
; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 10
; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX2-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> undef, float* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX2-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX2-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3
; AVX2-NEXT: [[TMP14:%.*]] = insertelement <8 x float*> [[TMP13]], float* [[TMP6]], i32 4
; AVX2-NEXT: [[TMP15:%.*]] = insertelement <8 x float*> [[TMP14]], float* [[TMP7]], i32 5
; AVX2-NEXT: [[TMP16:%.*]] = insertelement <8 x float*> [[TMP15]], float* [[TMP8]], i32 6
; AVX2-NEXT: [[TMP17:%.*]] = insertelement <8 x float*> [[TMP16]], float* [[TMP9]], i32 7
; AVX2-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP17]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), [[TBAA0]]
; AVX2-NEXT: [[TMP19:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: [[TMP20:%.*]] = getelementptr float, <8 x float*> [[TMP19]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX2-NEXT: [[TMP21:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP20]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), [[TBAA0]]
; AVX2-NEXT: [[TMP22:%.*]] = fdiv <8 x float> [[TMP18]], [[TMP21]]
; AVX2-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX2-NEXT: store <8 x float> [[TMP22]], <8 x float>* [[TMP23]], align 4, [[TBAA0]]
; AVX2-NEXT: ret void
;
; AVX512-LABEL: @gather_load_div(
; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 10
; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX512-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> undef, float* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX512-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX512-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3
; AVX512-NEXT: [[TMP14:%.*]] = insertelement <8 x float*> [[TMP13]], float* [[TMP6]], i32 4
; AVX512-NEXT: [[TMP15:%.*]] = insertelement <8 x float*> [[TMP14]], float* [[TMP7]], i32 5
; AVX512-NEXT: [[TMP16:%.*]] = insertelement <8 x float*> [[TMP15]], float* [[TMP8]], i32 6
; AVX512-NEXT: [[TMP17:%.*]] = insertelement <8 x float*> [[TMP16]], float* [[TMP9]], i32 7
; AVX512-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP17]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), [[TBAA0]]
; AVX512-NEXT: [[TMP19:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: [[TMP20:%.*]] = getelementptr float, <8 x float*> [[TMP19]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX512-NEXT: [[TMP21:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP20]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), [[TBAA0]]
; AVX512-NEXT: [[TMP22:%.*]] = fdiv <8 x float> [[TMP18]], [[TMP21]]
; AVX512-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX512-NEXT: store <8 x float> [[TMP22]], <8 x float>* [[TMP23]], align 4, [[TBAA0]]
; AVX512-NEXT: ret void
;
%3 = load float, float* %1, align 4, !tbaa !2
%4 = getelementptr inbounds float, float* %1, i64 4
%5 = load float, float* %4, align 4, !tbaa !2
%6 = fdiv float %3, %5
%7 = getelementptr inbounds float, float* %0, i64 1
store float %6, float* %0, align 4, !tbaa !2
%8 = getelementptr inbounds float, float* %1, i64 10
%9 = load float, float* %8, align 4, !tbaa !2
%10 = getelementptr inbounds float, float* %1, i64 13
%11 = load float, float* %10, align 4, !tbaa !2
%12 = fdiv float %9, %11
%13 = getelementptr inbounds float, float* %0, i64 2
store float %12, float* %7, align 4, !tbaa !2
%14 = getelementptr inbounds float, float* %1, i64 3
%15 = load float, float* %14, align 4, !tbaa !2
%16 = getelementptr inbounds float, float* %1, i64 11
%17 = load float, float* %16, align 4, !tbaa !2
%18 = fdiv float %15, %17
%19 = getelementptr inbounds float, float* %0, i64 3
store float %18, float* %13, align 4, !tbaa !2
%20 = getelementptr inbounds float, float* %1, i64 14
%21 = load float, float* %20, align 4, !tbaa !2
%22 = getelementptr inbounds float, float* %1, i64 44
%23 = load float, float* %22, align 4, !tbaa !2
%24 = fdiv float %21, %23
%25 = getelementptr inbounds float, float* %0, i64 4
store float %24, float* %19, align 4, !tbaa !2
%26 = getelementptr inbounds float, float* %1, i64 17
%27 = load float, float* %26, align 4, !tbaa !2
%28 = getelementptr inbounds float, float* %1, i64 33
%29 = load float, float* %28, align 4, !tbaa !2
%30 = fdiv float %27, %29
%31 = getelementptr inbounds float, float* %0, i64 5
store float %30, float* %25, align 4, !tbaa !2
%32 = getelementptr inbounds float, float* %1, i64 8
%33 = load float, float* %32, align 4, !tbaa !2
%34 = getelementptr inbounds float, float* %1, i64 30
%35 = load float, float* %34, align 4, !tbaa !2
%36 = fdiv float %33, %35
%37 = getelementptr inbounds float, float* %0, i64 6
store float %36, float* %31, align 4, !tbaa !2
%38 = getelementptr inbounds float, float* %1, i64 5
%39 = load float, float* %38, align 4, !tbaa !2
%40 = getelementptr inbounds float, float* %1, i64 27
%41 = load float, float* %40, align 4, !tbaa !2
%42 = fdiv float %39, %41
%43 = getelementptr inbounds float, float* %0, i64 7
store float %42, float* %37, align 4, !tbaa !2
%44 = getelementptr inbounds float, float* %1, i64 20
%45 = load float, float* %44, align 4, !tbaa !2
%46 = getelementptr inbounds float, float* %1, i64 23
%47 = load float, float* %46, align 4, !tbaa !2
%48 = fdiv float %45, %47
store float %48, float* %43, align 4, !tbaa !2
ret void
}
!2 = !{!3, !3, i64 0}
!3 = !{!"short", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C++ TBAA"}

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More