[IR] Let IRBuilder's CreateVectorSplat/CreateShuffleVector use poison as placeholder

This patch updates IRBuilder to create insertelement/shufflevector using poison as a placeholder.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D93793
This commit is contained in:
Juneyoung Lee 2020-12-24 09:33:58 +09:00
parent 6df161a2fb
commit 278aa65cc4
158 changed files with 3792 additions and 2965 deletions

View File

@ -78,14 +78,14 @@ void test_core(void) {
// CHECK-ASM: vst
vd = vec_splat(vd, 0);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vd = vec_splat(vd, 1);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 1>
// CHECK-ASM: vrepg
vd = vec_splats(d);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vlrepg
vd = vec_mergeh(vd, vd);

View File

@ -729,79 +729,79 @@ void test_core(void) {
// CHECK: <2 x i64> <i64 -4503582447501313, i64 -4503582447501313>
vsc = vec_splat(vsc, 0);
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-ASM: vrepb
vsc = vec_splat(vsc, 15);
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
// CHECK-ASM: vrepb
vuc = vec_splat(vuc, 0);
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-ASM: vrepb
vuc = vec_splat(vuc, 15);
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
// CHECK-ASM: vrepb
vbc = vec_splat(vbc, 0);
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-ASM: vrepb
vbc = vec_splat(vbc, 15);
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
// CHECK-ASM: vrepb
vss = vec_splat(vss, 0);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-ASM: vreph
vss = vec_splat(vss, 7);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK-ASM: vreph
vus = vec_splat(vus, 0);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-ASM: vreph
vus = vec_splat(vus, 7);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK-ASM: vreph
vbs = vec_splat(vbs, 0);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-ASM: vreph
vbs = vec_splat(vbs, 7);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK-ASM: vreph
vsi = vec_splat(vsi, 0);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vrepf
vsi = vec_splat(vsi, 3);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-ASM: vrepf
vui = vec_splat(vui, 0);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vrepf
vui = vec_splat(vui, 3);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-ASM: vrepf
vbi = vec_splat(vbi, 0);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vrepf
vbi = vec_splat(vbi, 3);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-ASM: vrepf
vsl = vec_splat(vsl, 0);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vsl = vec_splat(vsl, 1);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
// CHECK-ASM: vrepg
vul = vec_splat(vul, 0);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vul = vec_splat(vul, 1);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
// CHECK-ASM: vrepg
vbl = vec_splat(vbl, 0);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vbl = vec_splat(vbl, 1);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
// CHECK-ASM: vrepg
vd = vec_splat(vd, 0);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vd = vec_splat(vd, 1);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 1>
@ -841,31 +841,31 @@ void test_core(void) {
// CHECK: <2 x i64> <i64 32767, i64 32767>
vsc = vec_splats(sc);
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-ASM: vlrepb
vuc = vec_splats(uc);
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-ASM: vlrepb
vss = vec_splats(ss);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-ASM: vlreph
vus = vec_splats(us);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-ASM: vlreph
vsi = vec_splats(si);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vlrepf
vui = vec_splats(ui);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vlrepf
vsl = vec_splats(sl);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vlrepg
vul = vec_splats(ul);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vlrepg
vd = vec_splats(d);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vlrepg
vsl = vec_extend_s64(vsc);

View File

@ -130,23 +130,23 @@ void test_core(void) {
// CHECK-ASM: vst
vf = vec_splat(vf, 0);
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> zeroinitializer
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vrepf
vf = vec_splat(vf, 1);
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK-ASM: vrepf
vd = vec_splat(vd, 0);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vd = vec_splat(vd, 1);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 1>
// CHECK-ASM: vrepg
vf = vec_splats(f);
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> zeroinitializer
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vlrepf
vd = vec_splats(d);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vlrepg
vf = vec_mergeh(vf, vf);

View File

@ -235,23 +235,23 @@ void test_core(void) {
// CHECK-ASM: vstrlr
vf = vec_splat(vf, 0);
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> zeroinitializer
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vrepf
vf = vec_splat(vf, 1);
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK-ASM: vrepf
vd = vec_splat(vd, 0);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vd = vec_splat(vd, 1);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 1>
// CHECK-ASM: vrepg
vf = vec_splats(f);
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> zeroinitializer
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vlrepf
vd = vec_splats(d);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vlrepg
vf = vec_mergeh(vf, vf);

View File

@ -1924,8 +1924,8 @@ void test_xor_assign(void) {
// CHECK: store volatile <16 x i8> [[SHL1]], <16 x i8>* @sc, align 8
// CHECK: [[TMP4:%.*]] = load volatile <16 x i8>, <16 x i8>* @sc, align 8
// CHECK: [[TMP5:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP5]], i32 0
// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> undef, <16 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP5]], i32 0
// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> poison, <16 x i32> zeroinitializer
// CHECK: [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i8>
// CHECK: [[SHL2:%.*]] = shl <16 x i8> [[TMP4]], [[SH_PROM]]
// CHECK: store volatile <16 x i8> [[SHL2]], <16 x i8>* @sc, align 8
@ -1942,8 +1942,8 @@ void test_xor_assign(void) {
// CHECK: store volatile <16 x i8> [[SHL5]], <16 x i8>* @uc, align 8
// CHECK: [[TMP11:%.*]] = load volatile <16 x i8>, <16 x i8>* @uc, align 8
// CHECK: [[TMP12:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> undef, i32 [[TMP12]], i32 0
// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> undef, <16 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP12]], i32 0
// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer
// CHECK: [[SH_PROM8:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT7]] to <16 x i8>
// CHECK: [[SHL9:%.*]] = shl <16 x i8> [[TMP11]], [[SH_PROM8]]
// CHECK: store volatile <16 x i8> [[SHL9]], <16 x i8>* @uc, align 8
@ -1960,8 +1960,8 @@ void test_xor_assign(void) {
// CHECK: store volatile <8 x i16> [[SHL12]], <8 x i16>* @ss, align 8
// CHECK: [[TMP18:%.*]] = load volatile <8 x i16>, <8 x i16>* @ss, align 8
// CHECK: [[TMP19:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP19]], i32 0
// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[SH_PROM15:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT14]] to <8 x i16>
// CHECK: [[SHL16:%.*]] = shl <8 x i16> [[TMP18]], [[SH_PROM15]]
// CHECK: store volatile <8 x i16> [[SHL16]], <8 x i16>* @ss, align 8
@ -1978,8 +1978,8 @@ void test_xor_assign(void) {
// CHECK: store volatile <8 x i16> [[SHL19]], <8 x i16>* @us, align 8
// CHECK: [[TMP25:%.*]] = load volatile <8 x i16>, <8 x i16>* @us, align 8
// CHECK: [[TMP26:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> undef, i32 [[TMP26]], i32 0
// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP26]], i32 0
// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[SH_PROM22:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT21]] to <8 x i16>
// CHECK: [[SHL23:%.*]] = shl <8 x i16> [[TMP25]], [[SH_PROM22]]
// CHECK: store volatile <8 x i16> [[SHL23]], <8 x i16>* @us, align 8
@ -1996,8 +1996,8 @@ void test_xor_assign(void) {
// CHECK: store volatile <4 x i32> [[SHL26]], <4 x i32>* @si, align 8
// CHECK: [[TMP32:%.*]] = load volatile <4 x i32>, <4 x i32>* @si, align 8
// CHECK: [[TMP33:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP33]], i32 0
// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP33]], i32 0
// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: [[SHL29:%.*]] = shl <4 x i32> [[TMP32]], [[SPLAT_SPLAT28]]
// CHECK: store volatile <4 x i32> [[SHL29]], <4 x i32>* @si, align 8
// CHECK: [[TMP34:%.*]] = load volatile <4 x i32>, <4 x i32>* @si, align 8
@ -2013,8 +2013,8 @@ void test_xor_assign(void) {
// CHECK: store volatile <4 x i32> [[SHL32]], <4 x i32>* @ui, align 8
// CHECK: [[TMP39:%.*]] = load volatile <4 x i32>, <4 x i32>* @ui, align 8
// CHECK: [[TMP40:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> undef, i32 [[TMP40]], i32 0
// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP40]], i32 0
// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: [[SHL35:%.*]] = shl <4 x i32> [[TMP39]], [[SPLAT_SPLAT34]]
// CHECK: store volatile <4 x i32> [[SHL35]], <4 x i32>* @ui, align 8
// CHECK: [[TMP41:%.*]] = load volatile <4 x i32>, <4 x i32>* @ui, align 8
@ -2030,8 +2030,8 @@ void test_xor_assign(void) {
// CHECK: store volatile <2 x i64> [[SHL38]], <2 x i64>* @sl, align 8
// CHECK: [[TMP46:%.*]] = load volatile <2 x i64>, <2 x i64>* @sl, align 8
// CHECK: [[TMP47:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> undef, i32 [[TMP47]], i32 0
// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> undef, <2 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP47]], i32 0
// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> poison, <2 x i32> zeroinitializer
// CHECK: [[SH_PROM41:%.*]] = zext <2 x i32> [[SPLAT_SPLAT40]] to <2 x i64>
// CHECK: [[SHL42:%.*]] = shl <2 x i64> [[TMP46]], [[SH_PROM41]]
// CHECK: store volatile <2 x i64> [[SHL42]], <2 x i64>* @sl, align 8
@ -2048,8 +2048,8 @@ void test_xor_assign(void) {
// CHECK: store volatile <2 x i64> [[SHL45]], <2 x i64>* @ul, align 8
// CHECK: [[TMP53:%.*]] = load volatile <2 x i64>, <2 x i64>* @ul, align 8
// CHECK: [[TMP54:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> undef, i32 [[TMP54]], i32 0
// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> undef, <2 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP54]], i32 0
// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> poison, <2 x i32> zeroinitializer
// CHECK: [[SH_PROM48:%.*]] = zext <2 x i32> [[SPLAT_SPLAT47]] to <2 x i64>
// CHECK: [[SHL49:%.*]] = shl <2 x i64> [[TMP53]], [[SH_PROM48]]
// CHECK: store volatile <2 x i64> [[SHL49]], <2 x i64>* @ul, align 8
@ -2106,8 +2106,8 @@ void test_sl(void) {
// CHECK: [[SHL1:%.*]] = shl <16 x i8> [[TMP3]], [[TMP2]]
// CHECK: store volatile <16 x i8> [[SHL1]], <16 x i8>* @sc, align 8
// CHECK: [[TMP4:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP4]], i32 0
// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> undef, <16 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP4]], i32 0
// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> poison, <16 x i32> zeroinitializer
// CHECK: [[TMP5:%.*]] = load volatile <16 x i8>, <16 x i8>* @sc, align 8
// CHECK: [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i8>
// CHECK: [[SHL2:%.*]] = shl <16 x i8> [[TMP5]], [[SH_PROM]]
@ -2124,8 +2124,8 @@ void test_sl(void) {
// CHECK: [[SHL5:%.*]] = shl <16 x i8> [[TMP10]], [[TMP9]]
// CHECK: store volatile <16 x i8> [[SHL5]], <16 x i8>* @uc, align 8
// CHECK: [[TMP11:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> undef, i32 [[TMP11]], i32 0
// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> undef, <16 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP11]], i32 0
// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer
// CHECK: [[TMP12:%.*]] = load volatile <16 x i8>, <16 x i8>* @uc, align 8
// CHECK: [[SH_PROM8:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT7]] to <16 x i8>
// CHECK: [[SHL9:%.*]] = shl <16 x i8> [[TMP12]], [[SH_PROM8]]
@ -2142,8 +2142,8 @@ void test_sl(void) {
// CHECK: [[SHL12:%.*]] = shl <8 x i16> [[TMP17]], [[TMP16]]
// CHECK: store volatile <8 x i16> [[SHL12]], <8 x i16>* @ss, align 8
// CHECK: [[TMP18:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> undef, i32 [[TMP18]], i32 0
// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP18]], i32 0
// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[TMP19:%.*]] = load volatile <8 x i16>, <8 x i16>* @ss, align 8
// CHECK: [[SH_PROM15:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT14]] to <8 x i16>
// CHECK: [[SHL16:%.*]] = shl <8 x i16> [[TMP19]], [[SH_PROM15]]
@ -2160,8 +2160,8 @@ void test_sl(void) {
// CHECK: [[SHL19:%.*]] = shl <8 x i16> [[TMP24]], [[TMP23]]
// CHECK: store volatile <8 x i16> [[SHL19]], <8 x i16>* @us, align 8
// CHECK: [[TMP25:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> undef, i32 [[TMP25]], i32 0
// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP25]], i32 0
// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[TMP26:%.*]] = load volatile <8 x i16>, <8 x i16>* @us, align 8
// CHECK: [[SH_PROM22:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT21]] to <8 x i16>
// CHECK: [[SHL23:%.*]] = shl <8 x i16> [[TMP26]], [[SH_PROM22]]
@ -2178,8 +2178,8 @@ void test_sl(void) {
// CHECK: [[SHL26:%.*]] = shl <4 x i32> [[TMP31]], [[TMP30]]
// CHECK: store volatile <4 x i32> [[SHL26]], <4 x i32>* @si, align 8
// CHECK: [[TMP32:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP32]], i32 0
// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP32]], i32 0
// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: [[TMP33:%.*]] = load volatile <4 x i32>, <4 x i32>* @si, align 8
// CHECK: [[SHL29:%.*]] = shl <4 x i32> [[TMP33]], [[SPLAT_SPLAT28]]
// CHECK: store volatile <4 x i32> [[SHL29]], <4 x i32>* @si, align 8
@ -2195,8 +2195,8 @@ void test_sl(void) {
// CHECK: [[SHL32:%.*]] = shl <4 x i32> [[TMP38]], [[TMP37]]
// CHECK: store volatile <4 x i32> [[SHL32]], <4 x i32>* @ui, align 8
// CHECK: [[TMP39:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> undef, i32 [[TMP39]], i32 0
// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP39]], i32 0
// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: [[TMP40:%.*]] = load volatile <4 x i32>, <4 x i32>* @ui, align 8
// CHECK: [[SHL35:%.*]] = shl <4 x i32> [[TMP40]], [[SPLAT_SPLAT34]]
// CHECK: store volatile <4 x i32> [[SHL35]], <4 x i32>* @ui, align 8
@ -2212,8 +2212,8 @@ void test_sl(void) {
// CHECK: [[SHL38:%.*]] = shl <2 x i64> [[TMP45]], [[TMP44]]
// CHECK: store volatile <2 x i64> [[SHL38]], <2 x i64>* @sl, align 8
// CHECK: [[TMP46:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> undef, i32 [[TMP46]], i32 0
// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> undef, <2 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP46]], i32 0
// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> poison, <2 x i32> zeroinitializer
// CHECK: [[TMP47:%.*]] = load volatile <2 x i64>, <2 x i64>* @sl, align 8
// CHECK: [[SH_PROM41:%.*]] = zext <2 x i32> [[SPLAT_SPLAT40]] to <2 x i64>
// CHECK: [[SHL42:%.*]] = shl <2 x i64> [[TMP47]], [[SH_PROM41]]
@ -2230,8 +2230,8 @@ void test_sl(void) {
// CHECK: [[SHL45:%.*]] = shl <2 x i64> [[TMP52]], [[TMP51]]
// CHECK: store volatile <2 x i64> [[SHL45]], <2 x i64>* @ul, align 8
// CHECK: [[TMP53:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> undef, i32 [[TMP53]], i32 0
// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> undef, <2 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP53]], i32 0
// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> poison, <2 x i32> zeroinitializer
// CHECK: [[TMP54:%.*]] = load volatile <2 x i64>, <2 x i64>* @ul, align 8
// CHECK: [[SH_PROM48:%.*]] = zext <2 x i32> [[SPLAT_SPLAT47]] to <2 x i64>
// CHECK: [[SHL49:%.*]] = shl <2 x i64> [[TMP54]], [[SH_PROM48]]
@ -2290,8 +2290,8 @@ void test_sl_assign(void) {
// CHECK: store volatile <16 x i8> [[SHR1]], <16 x i8>* @sc, align 8
// CHECK: [[TMP4:%.*]] = load volatile <16 x i8>, <16 x i8>* @sc, align 8
// CHECK: [[TMP5:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP5]], i32 0
// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> undef, <16 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP5]], i32 0
// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> poison, <16 x i32> zeroinitializer
// CHECK: [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i8>
// CHECK: [[SHR2:%.*]] = ashr <16 x i8> [[TMP4]], [[SH_PROM]]
// CHECK: store volatile <16 x i8> [[SHR2]], <16 x i8>* @sc, align 8
@ -2308,8 +2308,8 @@ void test_sl_assign(void) {
// CHECK: store volatile <16 x i8> [[SHR5]], <16 x i8>* @uc, align 8
// CHECK: [[TMP11:%.*]] = load volatile <16 x i8>, <16 x i8>* @uc, align 8
// CHECK: [[TMP12:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> undef, i32 [[TMP12]], i32 0
// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> undef, <16 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP12]], i32 0
// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer
// CHECK: [[SH_PROM8:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT7]] to <16 x i8>
// CHECK: [[SHR9:%.*]] = lshr <16 x i8> [[TMP11]], [[SH_PROM8]]
// CHECK: store volatile <16 x i8> [[SHR9]], <16 x i8>* @uc, align 8
@ -2326,8 +2326,8 @@ void test_sl_assign(void) {
// CHECK: store volatile <8 x i16> [[SHR12]], <8 x i16>* @ss, align 8
// CHECK: [[TMP18:%.*]] = load volatile <8 x i16>, <8 x i16>* @ss, align 8
// CHECK: [[TMP19:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP19]], i32 0
// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[SH_PROM15:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT14]] to <8 x i16>
// CHECK: [[SHR16:%.*]] = ashr <8 x i16> [[TMP18]], [[SH_PROM15]]
// CHECK: store volatile <8 x i16> [[SHR16]], <8 x i16>* @ss, align 8
@ -2344,8 +2344,8 @@ void test_sl_assign(void) {
// CHECK: store volatile <8 x i16> [[SHR19]], <8 x i16>* @us, align 8
// CHECK: [[TMP25:%.*]] = load volatile <8 x i16>, <8 x i16>* @us, align 8
// CHECK: [[TMP26:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> undef, i32 [[TMP26]], i32 0
// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP26]], i32 0
// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[SH_PROM22:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT21]] to <8 x i16>
// CHECK: [[SHR23:%.*]] = lshr <8 x i16> [[TMP25]], [[SH_PROM22]]
// CHECK: store volatile <8 x i16> [[SHR23]], <8 x i16>* @us, align 8
@ -2362,8 +2362,8 @@ void test_sl_assign(void) {
// CHECK: store volatile <4 x i32> [[SHR26]], <4 x i32>* @si, align 8
// CHECK: [[TMP32:%.*]] = load volatile <4 x i32>, <4 x i32>* @si, align 8
// CHECK: [[TMP33:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP33]], i32 0
// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP33]], i32 0
// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: [[SHR29:%.*]] = ashr <4 x i32> [[TMP32]], [[SPLAT_SPLAT28]]
// CHECK: store volatile <4 x i32> [[SHR29]], <4 x i32>* @si, align 8
// CHECK: [[TMP34:%.*]] = load volatile <4 x i32>, <4 x i32>* @si, align 8
@ -2379,8 +2379,8 @@ void test_sl_assign(void) {
// CHECK: store volatile <4 x i32> [[SHR32]], <4 x i32>* @ui, align 8
// CHECK: [[TMP39:%.*]] = load volatile <4 x i32>, <4 x i32>* @ui, align 8
// CHECK: [[TMP40:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> undef, i32 [[TMP40]], i32 0
// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP40]], i32 0
// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: [[SHR35:%.*]] = lshr <4 x i32> [[TMP39]], [[SPLAT_SPLAT34]]
// CHECK: store volatile <4 x i32> [[SHR35]], <4 x i32>* @ui, align 8
// CHECK: [[TMP41:%.*]] = load volatile <4 x i32>, <4 x i32>* @ui, align 8
@ -2396,8 +2396,8 @@ void test_sl_assign(void) {
// CHECK: store volatile <2 x i64> [[SHR38]], <2 x i64>* @sl, align 8
// CHECK: [[TMP46:%.*]] = load volatile <2 x i64>, <2 x i64>* @sl, align 8
// CHECK: [[TMP47:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> undef, i32 [[TMP47]], i32 0
// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> undef, <2 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP47]], i32 0
// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> poison, <2 x i32> zeroinitializer
// CHECK: [[SH_PROM41:%.*]] = zext <2 x i32> [[SPLAT_SPLAT40]] to <2 x i64>
// CHECK: [[SHR42:%.*]] = ashr <2 x i64> [[TMP46]], [[SH_PROM41]]
// CHECK: store volatile <2 x i64> [[SHR42]], <2 x i64>* @sl, align 8
@ -2414,8 +2414,8 @@ void test_sl_assign(void) {
// CHECK: store volatile <2 x i64> [[SHR45]], <2 x i64>* @ul, align 8
// CHECK: [[TMP53:%.*]] = load volatile <2 x i64>, <2 x i64>* @ul, align 8
// CHECK: [[TMP54:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> undef, i32 [[TMP54]], i32 0
// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> undef, <2 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP54]], i32 0
// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> poison, <2 x i32> zeroinitializer
// CHECK: [[SH_PROM48:%.*]] = zext <2 x i32> [[SPLAT_SPLAT47]] to <2 x i64>
// CHECK: [[SHR49:%.*]] = lshr <2 x i64> [[TMP53]], [[SH_PROM48]]
// CHECK: store volatile <2 x i64> [[SHR49]], <2 x i64>* @ul, align 8
@ -2472,8 +2472,8 @@ void test_sr(void) {
// CHECK: [[SHR1:%.*]] = ashr <16 x i8> [[TMP3]], [[TMP2]]
// CHECK: store volatile <16 x i8> [[SHR1]], <16 x i8>* @sc, align 8
// CHECK: [[TMP4:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP4]], i32 0
// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> undef, <16 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP4]], i32 0
// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> poison, <16 x i32> zeroinitializer
// CHECK: [[TMP5:%.*]] = load volatile <16 x i8>, <16 x i8>* @sc, align 8
// CHECK: [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i8>
// CHECK: [[SHR2:%.*]] = ashr <16 x i8> [[TMP5]], [[SH_PROM]]
@ -2490,8 +2490,8 @@ void test_sr(void) {
// CHECK: [[SHR5:%.*]] = lshr <16 x i8> [[TMP10]], [[TMP9]]
// CHECK: store volatile <16 x i8> [[SHR5]], <16 x i8>* @uc, align 8
// CHECK: [[TMP11:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> undef, i32 [[TMP11]], i32 0
// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> undef, <16 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP11]], i32 0
// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer
// CHECK: [[TMP12:%.*]] = load volatile <16 x i8>, <16 x i8>* @uc, align 8
// CHECK: [[SH_PROM8:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT7]] to <16 x i8>
// CHECK: [[SHR9:%.*]] = lshr <16 x i8> [[TMP12]], [[SH_PROM8]]
@ -2508,8 +2508,8 @@ void test_sr(void) {
// CHECK: [[SHR12:%.*]] = ashr <8 x i16> [[TMP17]], [[TMP16]]
// CHECK: store volatile <8 x i16> [[SHR12]], <8 x i16>* @ss, align 8
// CHECK: [[TMP18:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> undef, i32 [[TMP18]], i32 0
// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP18]], i32 0
// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[TMP19:%.*]] = load volatile <8 x i16>, <8 x i16>* @ss, align 8
// CHECK: [[SH_PROM15:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT14]] to <8 x i16>
// CHECK: [[SHR16:%.*]] = ashr <8 x i16> [[TMP19]], [[SH_PROM15]]
@ -2526,8 +2526,8 @@ void test_sr(void) {
// CHECK: [[SHR19:%.*]] = lshr <8 x i16> [[TMP24]], [[TMP23]]
// CHECK: store volatile <8 x i16> [[SHR19]], <8 x i16>* @us, align 8
// CHECK: [[TMP25:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> undef, i32 [[TMP25]], i32 0
// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP25]], i32 0
// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[TMP26:%.*]] = load volatile <8 x i16>, <8 x i16>* @us, align 8
// CHECK: [[SH_PROM22:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT21]] to <8 x i16>
// CHECK: [[SHR23:%.*]] = lshr <8 x i16> [[TMP26]], [[SH_PROM22]]
@ -2544,8 +2544,8 @@ void test_sr(void) {
// CHECK: [[SHR26:%.*]] = ashr <4 x i32> [[TMP31]], [[TMP30]]
// CHECK: store volatile <4 x i32> [[SHR26]], <4 x i32>* @si, align 8
// CHECK: [[TMP32:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP32]], i32 0
// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP32]], i32 0
// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: [[TMP33:%.*]] = load volatile <4 x i32>, <4 x i32>* @si, align 8
// CHECK: [[SHR29:%.*]] = ashr <4 x i32> [[TMP33]], [[SPLAT_SPLAT28]]
// CHECK: store volatile <4 x i32> [[SHR29]], <4 x i32>* @si, align 8
@ -2561,8 +2561,8 @@ void test_sr(void) {
// CHECK: [[SHR32:%.*]] = lshr <4 x i32> [[TMP38]], [[TMP37]]
// CHECK: store volatile <4 x i32> [[SHR32]], <4 x i32>* @ui, align 8
// CHECK: [[TMP39:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> undef, i32 [[TMP39]], i32 0
// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP39]], i32 0
// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: [[TMP40:%.*]] = load volatile <4 x i32>, <4 x i32>* @ui, align 8
// CHECK: [[SHR35:%.*]] = lshr <4 x i32> [[TMP40]], [[SPLAT_SPLAT34]]
// CHECK: store volatile <4 x i32> [[SHR35]], <4 x i32>* @ui, align 8
@ -2578,8 +2578,8 @@ void test_sr(void) {
// CHECK: [[SHR38:%.*]] = ashr <2 x i64> [[TMP45]], [[TMP44]]
// CHECK: store volatile <2 x i64> [[SHR38]], <2 x i64>* @sl, align 8
// CHECK: [[TMP46:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> undef, i32 [[TMP46]], i32 0
// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> undef, <2 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP46]], i32 0
// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> poison, <2 x i32> zeroinitializer
// CHECK: [[TMP47:%.*]] = load volatile <2 x i64>, <2 x i64>* @sl, align 8
// CHECK: [[SH_PROM41:%.*]] = zext <2 x i32> [[SPLAT_SPLAT40]] to <2 x i64>
// CHECK: [[SHR42:%.*]] = ashr <2 x i64> [[TMP47]], [[SH_PROM41]]
@ -2596,8 +2596,8 @@ void test_sr(void) {
// CHECK: [[SHR45:%.*]] = lshr <2 x i64> [[TMP52]], [[TMP51]]
// CHECK: store volatile <2 x i64> [[SHR45]], <2 x i64>* @ul, align 8
// CHECK: [[TMP53:%.*]] = load volatile i32, i32* @cnt, align 4
// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> undef, i32 [[TMP53]], i32 0
// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> undef, <2 x i32> zeroinitializer
// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP53]], i32 0
// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> poison, <2 x i32> zeroinitializer
// CHECK: [[TMP54:%.*]] = load volatile <2 x i64>, <2 x i64>* @ul, align 8
// CHECK: [[SH_PROM48:%.*]] = zext <2 x i32> [[SPLAT_SPLAT47]] to <2 x i64>
// CHECK: [[SHR49:%.*]] = lshr <2 x i64> [[TMP54]], [[SH_PROM48]]

View File

@ -134,8 +134,8 @@ mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
// CHECK-LABEL: @test_vcmpeqq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -152,8 +152,8 @@ mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
// CHECK-LABEL: @test_vcmpeqq_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -170,8 +170,8 @@ mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b)
// CHECK-LABEL: @test_vcmpeqq_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -188,8 +188,8 @@ mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
// CHECK-LABEL: @test_vcmpeqq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -206,8 +206,8 @@ mve_pred16_t test_vcmpeqq_n_s16(int16x8_t a, int16_t b)
// CHECK-LABEL: @test_vcmpeqq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -224,8 +224,8 @@ mve_pred16_t test_vcmpeqq_n_s32(int32x4_t a, int32_t b)
// CHECK-LABEL: @test_vcmpeqq_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -242,8 +242,8 @@ mve_pred16_t test_vcmpeqq_n_u8(uint8x16_t a, uint8_t b)
// CHECK-LABEL: @test_vcmpeqq_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -260,8 +260,8 @@ mve_pred16_t test_vcmpeqq_n_u16(uint16x8_t a, uint16_t b)
// CHECK-LABEL: @test_vcmpeqq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -432,8 +432,8 @@ mve_pred16_t test_vcmpeqq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -453,8 +453,8 @@ mve_pred16_t test_vcmpeqq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -474,8 +474,8 @@ mve_pred16_t test_vcmpeqq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]])
@ -495,8 +495,8 @@ mve_pred16_t test_vcmpeqq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -516,8 +516,8 @@ mve_pred16_t test_vcmpeqq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -537,8 +537,8 @@ mve_pred16_t test_vcmpeqq_m_n_s32(int32x4_t a, int32_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]])
@ -558,8 +558,8 @@ mve_pred16_t test_vcmpeqq_m_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -579,8 +579,8 @@ mve_pred16_t test_vcmpeqq_m_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -726,8 +726,8 @@ mve_pred16_t test_vcmpneq_u32(uint32x4_t a, uint32x4_t b)
// CHECK-LABEL: @test_vcmpneq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -744,8 +744,8 @@ mve_pred16_t test_vcmpneq_n_f16(float16x8_t a, float16_t b)
// CHECK-LABEL: @test_vcmpneq_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fcmp une <4 x float> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -762,8 +762,8 @@ mve_pred16_t test_vcmpneq_n_f32(float32x4_t a, float32_t b)
// CHECK-LABEL: @test_vcmpneq_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp ne <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -780,8 +780,8 @@ mve_pred16_t test_vcmpneq_n_s8(int8x16_t a, int8_t b)
// CHECK-LABEL: @test_vcmpneq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -798,8 +798,8 @@ mve_pred16_t test_vcmpneq_n_s16(int16x8_t a, int16_t b)
// CHECK-LABEL: @test_vcmpneq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -816,8 +816,8 @@ mve_pred16_t test_vcmpneq_n_s32(int32x4_t a, int32_t b)
// CHECK-LABEL: @test_vcmpneq_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp ne <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -834,8 +834,8 @@ mve_pred16_t test_vcmpneq_n_u8(uint8x16_t a, uint8_t b)
// CHECK-LABEL: @test_vcmpneq_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -852,8 +852,8 @@ mve_pred16_t test_vcmpneq_n_u16(uint16x8_t a, uint16_t b)
// CHECK-LABEL: @test_vcmpneq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -1024,8 +1024,8 @@ mve_pred16_t test_vcmpneq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -1045,8 +1045,8 @@ mve_pred16_t test_vcmpneq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp une <4 x float> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -1066,8 +1066,8 @@ mve_pred16_t test_vcmpneq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp ne <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]])
@ -1087,8 +1087,8 @@ mve_pred16_t test_vcmpneq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -1108,8 +1108,8 @@ mve_pred16_t test_vcmpneq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -1129,8 +1129,8 @@ mve_pred16_t test_vcmpneq_m_n_s32(int32x4_t a, int32_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp ne <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]])
@ -1150,8 +1150,8 @@ mve_pred16_t test_vcmpneq_m_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -1171,8 +1171,8 @@ mve_pred16_t test_vcmpneq_m_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -1318,8 +1318,8 @@ mve_pred16_t test_vcmpcsq_u32(uint32x4_t a, uint32x4_t b)
// CHECK-LABEL: @test_vcmpgeq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -1336,8 +1336,8 @@ mve_pred16_t test_vcmpgeq_n_f16(float16x8_t a, float16_t b)
// CHECK-LABEL: @test_vcmpgeq_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fcmp oge <4 x float> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -1354,8 +1354,8 @@ mve_pred16_t test_vcmpgeq_n_f32(float32x4_t a, float32_t b)
// CHECK-LABEL: @test_vcmpgeq_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp sge <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -1372,8 +1372,8 @@ mve_pred16_t test_vcmpgeq_n_s8(int8x16_t a, int8_t b)
// CHECK-LABEL: @test_vcmpgeq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp sge <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -1390,8 +1390,8 @@ mve_pred16_t test_vcmpgeq_n_s16(int16x8_t a, int16_t b)
// CHECK-LABEL: @test_vcmpgeq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -1408,8 +1408,8 @@ mve_pred16_t test_vcmpgeq_n_s32(int32x4_t a, int32_t b)
// CHECK-LABEL: @test_vcmpcsq_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -1426,8 +1426,8 @@ mve_pred16_t test_vcmpcsq_n_u8(uint8x16_t a, uint8_t b)
// CHECK-LABEL: @test_vcmpcsq_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -1444,8 +1444,8 @@ mve_pred16_t test_vcmpcsq_n_u16(uint16x8_t a, uint16_t b)
// CHECK-LABEL: @test_vcmpcsq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -1616,8 +1616,8 @@ mve_pred16_t test_vcmpcsq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -1637,8 +1637,8 @@ mve_pred16_t test_vcmpgeq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp oge <4 x float> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -1658,8 +1658,8 @@ mve_pred16_t test_vcmpgeq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp sge <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]])
@ -1679,8 +1679,8 @@ mve_pred16_t test_vcmpgeq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -1700,8 +1700,8 @@ mve_pred16_t test_vcmpgeq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -1721,8 +1721,8 @@ mve_pred16_t test_vcmpgeq_m_n_s32(int32x4_t a, int32_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp uge <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]])
@ -1742,8 +1742,8 @@ mve_pred16_t test_vcmpcsq_m_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp uge <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -1763,8 +1763,8 @@ mve_pred16_t test_vcmpcsq_m_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -1910,8 +1910,8 @@ mve_pred16_t test_vcmphiq_u32(uint32x4_t a, uint32x4_t b)
// CHECK-LABEL: @test_vcmpgtq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -1928,8 +1928,8 @@ mve_pred16_t test_vcmpgtq_n_f16(float16x8_t a, float16_t b)
// CHECK-LABEL: @test_vcmpgtq_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ogt <4 x float> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -1946,8 +1946,8 @@ mve_pred16_t test_vcmpgtq_n_f32(float32x4_t a, float32_t b)
// CHECK-LABEL: @test_vcmpgtq_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -1964,8 +1964,8 @@ mve_pred16_t test_vcmpgtq_n_s8(int8x16_t a, int8_t b)
// CHECK-LABEL: @test_vcmpgtq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -1982,8 +1982,8 @@ mve_pred16_t test_vcmpgtq_n_s16(int16x8_t a, int16_t b)
// CHECK-LABEL: @test_vcmpgtq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -2000,8 +2000,8 @@ mve_pred16_t test_vcmpgtq_n_s32(int32x4_t a, int32_t b)
// CHECK-LABEL: @test_vcmphiq_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -2018,8 +2018,8 @@ mve_pred16_t test_vcmphiq_n_u8(uint8x16_t a, uint8_t b)
// CHECK-LABEL: @test_vcmphiq_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -2036,8 +2036,8 @@ mve_pred16_t test_vcmphiq_n_u16(uint16x8_t a, uint16_t b)
// CHECK-LABEL: @test_vcmphiq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -2208,8 +2208,8 @@ mve_pred16_t test_vcmphiq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -2229,8 +2229,8 @@ mve_pred16_t test_vcmpgtq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -2250,8 +2250,8 @@ mve_pred16_t test_vcmpgtq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]])
@ -2271,8 +2271,8 @@ mve_pred16_t test_vcmpgtq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -2292,8 +2292,8 @@ mve_pred16_t test_vcmpgtq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -2313,8 +2313,8 @@ mve_pred16_t test_vcmpgtq_m_n_s32(int32x4_t a, int32_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]])
@ -2334,8 +2334,8 @@ mve_pred16_t test_vcmphiq_m_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -2355,8 +2355,8 @@ mve_pred16_t test_vcmphiq_m_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -2454,8 +2454,8 @@ mve_pred16_t test_vcmpleq_s32(int32x4_t a, int32x4_t b)
// CHECK-LABEL: @test_vcmpleq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -2472,8 +2472,8 @@ mve_pred16_t test_vcmpleq_n_f16(float16x8_t a, float16_t b)
// CHECK-LABEL: @test_vcmpleq_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <4 x float> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -2490,8 +2490,8 @@ mve_pred16_t test_vcmpleq_n_f32(float32x4_t a, float32_t b)
// CHECK-LABEL: @test_vcmpleq_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp sle <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -2508,8 +2508,8 @@ mve_pred16_t test_vcmpleq_n_s8(int8x16_t a, int8_t b)
// CHECK-LABEL: @test_vcmpleq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp sle <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -2526,8 +2526,8 @@ mve_pred16_t test_vcmpleq_n_s16(int16x8_t a, int16_t b)
// CHECK-LABEL: @test_vcmpleq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp sle <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -2641,8 +2641,8 @@ mve_pred16_t test_vcmpleq_m_s32(int32x4_t a, int32x4_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -2662,8 +2662,8 @@ mve_pred16_t test_vcmpleq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ole <4 x float> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -2683,8 +2683,8 @@ mve_pred16_t test_vcmpleq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp sle <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]])
@ -2704,8 +2704,8 @@ mve_pred16_t test_vcmpleq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -2725,8 +2725,8 @@ mve_pred16_t test_vcmpleq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -2824,8 +2824,8 @@ mve_pred16_t test_vcmpltq_s32(int32x4_t a, int32x4_t b)
// CHECK-LABEL: @test_vcmpltq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -2842,8 +2842,8 @@ mve_pred16_t test_vcmpltq_n_f16(float16x8_t a, float16_t b)
// CHECK-LABEL: @test_vcmpltq_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -2860,8 +2860,8 @@ mve_pred16_t test_vcmpltq_n_f32(float32x4_t a, float32_t b)
// CHECK-LABEL: @test_vcmpltq_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -2878,8 +2878,8 @@ mve_pred16_t test_vcmpltq_n_s8(int8x16_t a, int8_t b)
// CHECK-LABEL: @test_vcmpltq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -2896,8 +2896,8 @@ mve_pred16_t test_vcmpltq_n_s16(int16x8_t a, int16_t b)
// CHECK-LABEL: @test_vcmpltq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
@ -3011,8 +3011,8 @@ mve_pred16_t test_vcmpltq_m_s32(int32x4_t a, int32x4_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -3032,8 +3032,8 @@ mve_pred16_t test_vcmpltq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
@ -3053,8 +3053,8 @@ mve_pred16_t test_vcmpltq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp slt <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]])
@ -3074,8 +3074,8 @@ mve_pred16_t test_vcmpltq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
@ -3095,8 +3095,8 @@ mve_pred16_t test_vcmpltq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])

View File

@ -78,8 +78,8 @@ mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
// CHECK-LABEL: @_Z18test_vcmpeqq_n_f1619__simd128_float16_tDh(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16

View File

@ -6,8 +6,8 @@
// CHECK-LABEL: @test_vdupq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: ret <8 x half> [[DOTSPLAT]]
//
float16x8_t test_vdupq_n_f16(float16_t a)
@ -17,8 +17,8 @@ float16x8_t test_vdupq_n_f16(float16_t a)
// CHECK-LABEL: @test_vdupq_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: ret <4 x float> [[DOTSPLAT]]
//
float32x4_t test_vdupq_n_f32(float32_t a)
@ -28,8 +28,8 @@ float32x4_t test_vdupq_n_f32(float32_t a)
// CHECK-LABEL: @test_vdupq_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: ret <16 x i8> [[DOTSPLAT]]
//
int8x16_t test_vdupq_n_s8(int8_t a)
@ -39,8 +39,8 @@ int8x16_t test_vdupq_n_s8(int8_t a)
// CHECK-LABEL: @test_vdupq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: ret <8 x i16> [[DOTSPLAT]]
//
int16x8_t test_vdupq_n_s16(int16_t a)
@ -50,8 +50,8 @@ int16x8_t test_vdupq_n_s16(int16_t a)
// CHECK-LABEL: @test_vdupq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: ret <4 x i32> [[DOTSPLAT]]
//
int32x4_t test_vdupq_n_s32(int32_t a)
@ -61,8 +61,8 @@ int32x4_t test_vdupq_n_s32(int32_t a)
// CHECK-LABEL: @test_vdupq_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: ret <16 x i8> [[DOTSPLAT]]
//
uint8x16_t test_vdupq_n_u8(uint8_t a)
@ -72,8 +72,8 @@ uint8x16_t test_vdupq_n_u8(uint8_t a)
// CHECK-LABEL: @test_vdupq_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: ret <8 x i16> [[DOTSPLAT]]
//
uint16x8_t test_vdupq_n_u16(uint16_t a)
@ -83,8 +83,8 @@ uint16x8_t test_vdupq_n_u16(uint16_t a)
// CHECK-LABEL: @test_vdupq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: ret <4 x i32> [[DOTSPLAT]]
//
uint32x4_t test_vdupq_n_u32(uint32_t a)
@ -96,8 +96,8 @@ uint32x4_t test_vdupq_n_u32(uint32_t a)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x half> [[DOTSPLAT]], <8 x half> [[INACTIVE:%.*]]
// CHECK-NEXT: ret <8 x half> [[TMP2]]
//
@ -114,8 +114,8 @@ float16x8_t test_vdupq_m_n_f16(float16x8_t inactive, float16_t a, mve_pred16_t p
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[DOTSPLAT]], <4 x float> [[INACTIVE:%.*]]
// CHECK-NEXT: ret <4 x float> [[TMP2]]
//
@ -132,8 +132,8 @@ float32x4_t test_vdupq_m_n_f32(float32x4_t inactive, float32_t a, mve_pred16_t p
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> [[INACTIVE:%.*]]
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
//
@ -150,8 +150,8 @@ int8x16_t test_vdupq_m_n_s8(int8x16_t inactive, int8_t a, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> [[INACTIVE:%.*]]
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
@ -168,8 +168,8 @@ int16x8_t test_vdupq_m_n_s16(int16x8_t inactive, int16_t a, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> [[INACTIVE:%.*]]
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@ -186,8 +186,8 @@ int32x4_t test_vdupq_m_n_s32(int32x4_t inactive, int32_t a, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> [[INACTIVE:%.*]]
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
//
@ -204,8 +204,8 @@ uint8x16_t test_vdupq_m_n_u8(uint8x16_t inactive, uint8_t a, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> [[INACTIVE:%.*]]
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
@ -222,8 +222,8 @@ uint16x8_t test_vdupq_m_n_u16(uint16x8_t inactive, uint16_t a, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> [[INACTIVE:%.*]]
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@ -240,8 +240,8 @@ uint32x4_t test_vdupq_m_n_u32(uint32x4_t inactive, uint32_t a, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x half> [[DOTSPLAT]], <8 x half> undef
// CHECK-NEXT: ret <8 x half> [[TMP2]]
//
@ -254,8 +254,8 @@ float16x8_t test_vdupq_x_n_f16(float16_t a, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[DOTSPLAT]], <4 x float> undef
// CHECK-NEXT: ret <4 x float> [[TMP2]]
//
@ -268,8 +268,8 @@ float32x4_t test_vdupq_x_n_f32(float32_t a, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> undef
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
//
@ -282,8 +282,8 @@ int8x16_t test_vdupq_x_n_s8(int8_t a, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> undef
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
@ -296,8 +296,8 @@ int16x8_t test_vdupq_x_n_s16(int16_t a, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> undef
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@ -310,8 +310,8 @@ int32x4_t test_vdupq_x_n_s32(int32_t a, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> undef
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
//
@ -324,8 +324,8 @@ uint8x16_t test_vdupq_x_n_u8(uint8_t a, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> undef
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
@ -338,8 +338,8 @@ uint16x8_t test_vdupq_x_n_u16(uint16_t a, mve_pred16_t p)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> undef
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//

View File

@ -32,8 +32,8 @@ float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
// CHECK-LABEL: @test_vfmaq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]])
// CHECK-NEXT: ret <8 x half> [[TMP0]]
//
@ -47,8 +47,8 @@ float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
// CHECK-LABEL: @test_vfmaq_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]], <4 x float> [[A:%.*]])
// CHECK-NEXT: ret <4 x float> [[TMP0]]
//
@ -62,8 +62,8 @@ float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
// CHECK-LABEL: @test_vfmasq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]])
// CHECK-NEXT: ret <8 x half> [[TMP0]]
//
@ -77,8 +77,8 @@ float16x8_t test_vfmasq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
// CHECK-LABEL: @test_vfmasq_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]])
// CHECK-NEXT: ret <4 x float> [[TMP0]]
//
@ -120,8 +120,8 @@ float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
// CHECK-LABEL: @test_vmlaq_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[B:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[A:%.*]]
// CHECK-NEXT: ret <16 x i8> [[TMP1]]
@ -136,8 +136,8 @@ int8x16_t test_vmlaq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
// CHECK-LABEL: @test_vmlaq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[B:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[A:%.*]]
// CHECK-NEXT: ret <8 x i16> [[TMP1]]
@ -152,8 +152,8 @@ int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
// CHECK-LABEL: @test_vmlaq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[B:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[A:%.*]]
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
@ -168,8 +168,8 @@ int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
// CHECK-LABEL: @test_vmlaq_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[B:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[A:%.*]]
// CHECK-NEXT: ret <16 x i8> [[TMP1]]
@ -184,8 +184,8 @@ uint8x16_t test_vmlaq_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c) {
// CHECK-LABEL: @test_vmlaq_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[B:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[A:%.*]]
// CHECK-NEXT: ret <8 x i16> [[TMP1]]
@ -200,8 +200,8 @@ uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
// CHECK-LABEL: @test_vmlaq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[B:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[A:%.*]]
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
@ -217,8 +217,8 @@ uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
// CHECK-LABEL: @test_vmlasq_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[DOTSPLAT]]
// CHECK-NEXT: ret <16 x i8> [[TMP1]]
//
@ -233,8 +233,8 @@ int8x16_t test_vmlasq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
// CHECK-LABEL: @test_vmlasq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[DOTSPLAT]]
// CHECK-NEXT: ret <8 x i16> [[TMP1]]
//
@ -249,8 +249,8 @@ int16x8_t test_vmlasq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
// CHECK-LABEL: @test_vmlasq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[DOTSPLAT]]
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
@ -265,8 +265,8 @@ int32x4_t test_vmlasq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
// CHECK-LABEL: @test_vmlasq_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[DOTSPLAT]]
// CHECK-NEXT: ret <16 x i8> [[TMP1]]
//
@ -281,8 +281,8 @@ uint8x16_t test_vmlasq_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c) {
// CHECK-LABEL: @test_vmlasq_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[DOTSPLAT]]
// CHECK-NEXT: ret <8 x i16> [[TMP1]]
//
@ -297,8 +297,8 @@ uint16x8_t test_vmlasq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
// CHECK-LABEL: @test_vmlasq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[DOTSPLAT]]
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
@ -506,8 +506,8 @@ float32x4_t test_vfmaq_m_f32(float32x4_t a, float32x4_t b, float32x4_t c, mve_pr
// CHECK-LABEL: @test_vfmaq_m_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]], <8 x i1> [[TMP1]])
@ -523,8 +523,8 @@ float16x8_t test_vfmaq_m_n_f16(float16x8_t a, float16x8_t b, float16_t c, mve_pr
// CHECK-LABEL: @test_vfmaq_m_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]], <4 x float> [[A:%.*]], <4 x i1> [[TMP1]])
@ -540,8 +540,8 @@ float32x4_t test_vfmaq_m_n_f32(float32x4_t a, float32x4_t b, float32_t c, mve_pr
// CHECK-LABEL: @test_vfmasq_m_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]])
@ -557,8 +557,8 @@ float16x8_t test_vfmasq_m_n_f16(float16x8_t a, float16x8_t b, float16_t c, mve_p
// CHECK-LABEL: @test_vfmasq_m_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]])

View File

@ -98,8 +98,8 @@ float16x8_t test_vaddq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vaddq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = add <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
@ -114,8 +114,8 @@ uint32x4_t test_vaddq_n_u32(uint32x4_t a, uint32_t b)
// CHECK-LABEL: @test_vaddq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: ret <8 x half> [[TMP0]]
//
@ -130,8 +130,8 @@ float16x8_t test_vaddq_n_f16(float16x8_t a, float16_t b)
// CHECK-LABEL: @test_vaddq_m_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
@ -148,8 +148,8 @@ int8x16_t test_vaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1
// CHECK-LABEL: @test_vaddq_m_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
@ -166,8 +166,8 @@ float32x4_t test_vaddq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b,
// CHECK-LABEL: @test_vaddq_x_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef)
@ -184,8 +184,8 @@ uint16x8_t test_vaddq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vaddq_x_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef)

View File

@ -144,8 +144,8 @@ uint32x4_t test_vhaddq_x_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vhaddq_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vhadd.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 1)
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
//
@ -160,8 +160,8 @@ uint8x16_t test_vhaddq_n_u8(uint8x16_t a, uint8_t b)
// CHECK-LABEL: @test_vhaddq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vhadd.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0)
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
@ -176,8 +176,8 @@ int16x8_t test_vhaddq_n_s16(int16x8_t a, int16_t b)
// CHECK-LABEL: @test_vhaddq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vhadd.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1)
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
@ -192,8 +192,8 @@ uint32x4_t test_vhaddq_n_u32(uint32x4_t a, uint32_t b)
// CHECK-LABEL: @test_vhaddq_m_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.hadd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
@ -210,8 +210,8 @@ int8x16_t test_vhaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred
// CHECK-LABEL: @test_vhaddq_m_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.hadd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
@ -228,8 +228,8 @@ uint16x8_t test_vhaddq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mv
// CHECK-LABEL: @test_vhaddq_m_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
@ -246,8 +246,8 @@ int32x4_t test_vhaddq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pr
// CHECK-LABEL: @test_vhaddq_x_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.hadd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef)
@ -264,8 +264,8 @@ uint8x16_t test_vhaddq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vhaddq_x_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.hadd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
@ -282,8 +282,8 @@ int16x8_t test_vhaddq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vhaddq_x_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)

View File

@ -96,8 +96,8 @@ int32x4_t test_vhsubq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pr
// CHECK-LABEL: @test_vhsubq_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vhsub.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 1)
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
//
@ -112,8 +112,8 @@ uint8x16_t test_vhsubq_n_u8(uint8x16_t a, uint8_t b)
// CHECK-LABEL: @test_vhsubq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vhsub.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0)
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
@ -128,8 +128,8 @@ int16x8_t test_vhsubq_n_s16(int16x8_t a, int16_t b)
// CHECK-LABEL: @test_vhsubq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vhsub.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1)
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
@ -144,8 +144,8 @@ uint32x4_t test_vhsubq_n_u32(uint32x4_t a, uint32_t b)
// CHECK-LABEL: @test_vhsubq_m_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.hsub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
@ -162,8 +162,8 @@ int8x16_t test_vhsubq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred
// CHECK-LABEL: @test_vhsubq_m_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.hsub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
@ -180,8 +180,8 @@ uint16x8_t test_vhsubq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mv
// CHECK-LABEL: @test_vhsubq_m_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.hsub.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
@ -198,8 +198,8 @@ int32x4_t test_vhsubq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pr
// CHECK-LABEL: @test_vhsubq_x_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.hsub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef)
@ -216,8 +216,8 @@ uint8x16_t test_vhsubq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vhsubq_x_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.hsub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
@ -234,8 +234,8 @@ int16x8_t test_vhsubq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vhsubq_x_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.hsub.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)

View File

@ -190,8 +190,8 @@ float32x4_t test_vmulq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vmulq_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
//
@ -206,8 +206,8 @@ uint8x16_t test_vmulq_n_u8(uint8x16_t a, uint8_t b)
// CHECK-LABEL: @test_vmulq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
@ -222,8 +222,8 @@ int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b)
// CHECK-LABEL: @test_vmulq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
@ -238,8 +238,8 @@ uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b)
// CHECK-LABEL: @test_vmulq_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fmul <4 x float> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: ret <4 x float> [[TMP0]]
//
@ -254,8 +254,8 @@ float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b)
// CHECK-LABEL: @test_vmulq_m_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
@ -272,8 +272,8 @@ int8x16_t test_vmulq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1
// CHECK-LABEL: @test_vmulq_m_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
@ -290,8 +290,8 @@ uint16x8_t test_vmulq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve
// CHECK-LABEL: @test_vmulq_m_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
@ -308,8 +308,8 @@ int32x4_t test_vmulq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pre
// CHECK-LABEL: @test_vmulq_m_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
@ -326,8 +326,8 @@ float16x8_t test_vmulq_m_n_f16(float16x8_t inactive, float16x8_t a, float16_t b,
// CHECK-LABEL: @test_vmulq_x_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> undef)
@ -344,8 +344,8 @@ uint8x16_t test_vmulq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vmulq_x_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef)
@ -361,8 +361,8 @@ int16x8_t test_vmulq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
}
// CHECK-LABEL: @test_vmulq_x_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> undef)
@ -379,8 +379,8 @@ uint32x4_t test_vmulq_x_n_u32(uint32x4_t a, uint32_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vmulq_x_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> undef)

View File

@ -96,8 +96,8 @@ int32x4_t test_vqaddq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pr
// CHECK-LABEL: @test_vqaddq_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]])
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
//
@ -112,8 +112,8 @@ uint8x16_t test_vqaddq_n_u8(uint8x16_t a, uint8_t b)
// CHECK-LABEL: @test_vqaddq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]])
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
@ -128,8 +128,8 @@ int16x8_t test_vqaddq_n_s16(int16x8_t a, int16_t b)
// CHECK-LABEL: @test_vqaddq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
@ -144,8 +144,8 @@ uint32x4_t test_vqaddq_n_u32(uint32x4_t a, uint32_t b)
// CHECK-LABEL: @test_vqaddq_m_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qadd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
@ -162,8 +162,8 @@ int8x16_t test_vqaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred
// CHECK-LABEL: @test_vqaddq_m_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qadd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
@ -180,8 +180,8 @@ uint16x8_t test_vqaddq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mv
// CHECK-LABEL: @test_vqaddq_m_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qadd.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])

View File

@ -96,8 +96,8 @@ int32x4_t test_vqdmulhq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_
// CHECK-LABEL: @test_vqdmulhq_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmulh.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]])
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
//
@ -112,8 +112,8 @@ int8x16_t test_vqdmulhq_n_s8(int8x16_t a, int8_t b)
// CHECK-LABEL: @test_vqdmulhq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmulh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]])
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
@ -128,8 +128,8 @@ int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b)
// CHECK-LABEL: @test_vqdmulhq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmulh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
@ -144,8 +144,8 @@ int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b)
// CHECK-LABEL: @test_vqdmulhq_m_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qdmulh.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
@ -162,8 +162,8 @@ int8x16_t test_vqdmulhq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pr
// CHECK-LABEL: @test_vqdmulhq_m_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qdmulh.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
@ -180,8 +180,8 @@ int16x8_t test_vqdmulhq_m_n_s16(int16x8_t inactive, int16x8_t a, int16_t b, mve_
// CHECK-LABEL: @test_vqdmulhq_m_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])

View File

@ -62,8 +62,8 @@ int64x2_t test_vqdmullbq_m_s32(int64x2_t inactive, int32x4_t a, int32x4_t b, mve
// CHECK-LABEL: @test_vqdmullbq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0)
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
@ -77,8 +77,8 @@ int32x4_t test_vqdmullbq_n_s16(int16x8_t a, int16_t b) {
// CHECK-LABEL: @test_vqdmullbq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0)
// CHECK-NEXT: ret <2 x i64> [[TMP0]]
//
@ -92,8 +92,8 @@ int64x2_t test_vqdmullbq_n_s32(int32x4_t a, int32_t b) {
// CHECK-LABEL: @test_vqdmullbq_m_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
@ -109,8 +109,8 @@ int32x4_t test_vqdmullbq_m_n_s16(int32x4_t inactive, int16x8_t a, int16_t b, mve
// CHECK-LABEL: @test_vqdmullbq_m_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0, <4 x i1> [[TMP1]], <2 x i64> [[INACTIVE:%.*]])

View File

@ -62,8 +62,8 @@ int64x2_t test_vqdmulltq_m_s32(int64x2_t inactive, int32x4_t a, int32x4_t b, mve
// CHECK-LABEL: @test_vqdmulltq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1)
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
@ -77,8 +77,8 @@ int32x4_t test_vqdmulltq_n_s16(int16x8_t a, int16_t b) {
// CHECK-LABEL: @test_vqdmulltq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1)
// CHECK-NEXT: ret <2 x i64> [[TMP0]]
//
@ -92,8 +92,8 @@ int64x2_t test_vqdmulltq_n_s32(int32x4_t a, int32_t b) {
// CHECK-LABEL: @test_vqdmulltq_m_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
@ -109,8 +109,8 @@ int32x4_t test_vqdmulltq_m_n_s16(int32x4_t inactive, int16x8_t a, int16_t b, mve
// CHECK-LABEL: @test_vqdmulltq_m_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1, <4 x i1> [[TMP1]], <2 x i64> [[INACTIVE:%.*]])

View File

@ -96,8 +96,8 @@ int32x4_t test_vqrdmulhq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve
// CHECK-LABEL: @test_vqrdmulhq_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqrdmulh.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]])
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
//
@ -112,8 +112,8 @@ int8x16_t test_vqrdmulhq_n_s8(int8x16_t a, int8_t b)
// CHECK-LABEL: @test_vqrdmulhq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqrdmulh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]])
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
@ -128,8 +128,8 @@ int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b)
// CHECK-LABEL: @test_vqrdmulhq_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
@ -144,8 +144,8 @@ int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b)
// CHECK-LABEL: @test_vqrdmulhq_m_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qrdmulh.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
@ -162,8 +162,8 @@ int8x16_t test_vqrdmulhq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_p
// CHECK-LABEL: @test_vqrdmulhq_m_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qrdmulh.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
@ -180,8 +180,8 @@ int16x8_t test_vqrdmulhq_m_n_s16(int16x8_t inactive, int16x8_t a, int16_t b, mve
// CHECK-LABEL: @test_vqrdmulhq_m_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qrdmulh.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])

View File

@ -96,8 +96,8 @@ int32x4_t test_vqsubq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pr
// CHECK-LABEL: @test_vqsubq_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]])
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
//
@ -112,8 +112,8 @@ uint8x16_t test_vqsubq_n_u8(uint8x16_t a, uint8_t b)
// CHECK-LABEL: @test_vqsubq_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]])
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
@ -128,8 +128,8 @@ int16x8_t test_vqsubq_n_s16(int16x8_t a, int16_t b)
// CHECK-LABEL: @test_vqsubq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
@ -144,8 +144,8 @@ uint32x4_t test_vqsubq_n_u32(uint32x4_t a, uint32_t b)
// CHECK-LABEL: @test_vqsubq_m_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qsub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
@ -162,8 +162,8 @@ int8x16_t test_vqsubq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred
// CHECK-LABEL: @test_vqsubq_m_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qsub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
@ -180,8 +180,8 @@ uint16x8_t test_vqsubq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mv
// CHECK-LABEL: @test_vqsubq_m_n_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qsub.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])

View File

@ -98,8 +98,8 @@ float16x8_t test_vsubq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vsubq_n_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = sub <4 x i32> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
@ -114,8 +114,8 @@ uint32x4_t test_vsubq_n_u32(uint32x4_t a, uint32_t b)
// CHECK-LABEL: @test_vsubq_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[DOTSPLAT]]
// CHECK-NEXT: ret <8 x half> [[TMP0]]
//
@ -130,8 +130,8 @@ float16x8_t test_vsubq_n_f16(float16x8_t a, float16_t b)
// CHECK-LABEL: @test_vsubq_m_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
@ -148,8 +148,8 @@ int8x16_t test_vsubq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1
// CHECK-LABEL: @test_vsubq_m_n_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
@ -166,8 +166,8 @@ float32x4_t test_vsubq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b,
// CHECK-LABEL: @test_vsubq_x_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef)
@ -184,8 +184,8 @@ uint16x8_t test_vsubq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
// CHECK-LABEL: @test_vsubq_x_n_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef)

View File

@ -1359,12 +1359,12 @@ vector float test_vec_vec_splati_f(void) {
vector double test_vec_vec_splatid(void) {
// CHECK-BE: [[T1:%.+]] = fpext float %{{.+}} to double
// CHECK-BE-NEXT: [[T2:%.+]] = insertelement <2 x double> undef, double [[T1:%.+]], i32 0
// CHECK-BE-NEXT: [[T3:%.+]] = shufflevector <2 x double> [[T2:%.+]], <2 x double> undef, <2 x i32> zeroinitialize
// CHECK-BE-NEXT: [[T2:%.+]] = insertelement <2 x double> poison, double [[T1:%.+]], i32 0
// CHECK-BE-NEXT: [[T3:%.+]] = shufflevector <2 x double> [[T2:%.+]], <2 x double> poison, <2 x i32> zeroinitialize
// CHECK-BE-NEXT: ret <2 x double> [[T3:%.+]]
// CHECK-LE: [[T1:%.+]] = fpext float %{{.+}} to double
// CHECK-LE-NEXT: [[T2:%.+]] = insertelement <2 x double> undef, double [[T1:%.+]], i32 0
// CHECK-LE-NEXT: [[T3:%.+]] = shufflevector <2 x double> [[T2:%.+]], <2 x double> undef, <2 x i32> zeroinitialize
// CHECK-LE-NEXT: [[T2:%.+]] = insertelement <2 x double> poison, double [[T1:%.+]], i32 0
// CHECK-LE-NEXT: [[T3:%.+]] = shufflevector <2 x double> [[T2:%.+]], <2 x double> poison, <2 x i32> zeroinitialize
// CHECK-LE-NEXT: ret <2 x double> [[T3:%.+]]
return vec_splatid(1.0);
}

View File

@ -32,8 +32,8 @@ void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
// CHECK: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
// CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> undef, double [[SCALAR_EXT]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> undef, <25 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@ -44,8 +44,8 @@ void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
// CHECK-LABEL: define void @add_matrix_scalar_double_double(<25 x double> %a, double %vd)
// CHECK: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> undef, double [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> undef, <25 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@ -56,8 +56,8 @@ void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
// CHECK-LABEL: define void @add_matrix_scalar_float_float(<6 x float> %b, float %vf)
// CHECK: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
// CHECK-NEXT: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> undef, float [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> undef, <6 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
@ -69,8 +69,8 @@ void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
// CHECK: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
// CHECK-NEXT: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
// CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> undef, float [[SCALAR_TRUNC]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> undef, <6 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
@ -103,8 +103,8 @@ void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
// CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
// CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> undef, i32 [[SCALAR_EXT]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> undef, <27 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@ -116,8 +116,8 @@ void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
// CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
// CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> undef, i32 [[SCALAR_TRUNC]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> undef, <27 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@ -129,8 +129,8 @@ void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int
// CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
// CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> undef, i32 [[SCALAR_TRUNC]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> undef, <27 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@ -142,8 +142,8 @@ void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
// CHECK: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
// CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
// CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> undef, i64 [[SCALAR_EXT]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
// CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
@ -154,8 +154,8 @@ void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
// CHECK-LABEL: define void @add_matrix_scalar_long_long_int_int(<8 x i64> %b, i64 %vli)
// CHECK: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
// CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> undef, i64 [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
// CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
@ -166,8 +166,8 @@ void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned lo
// CHECK-LABEL: define void @add_matrix_scalar_long_long_int_unsigned_long_long
// CHECK: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
// CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> undef, i64 [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
// CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
b = vulli + b;
@ -208,8 +208,8 @@ void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) {
// CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[S:%.*]] = load float, float* %s.addr, align 4
// CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> undef, double [[S_EXT]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> undef, <25 x i32> zeroinitializer
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
// CHECK-NEXT: ret void
@ -221,8 +221,8 @@ void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
// CHECK-LABEL: @multiply_double_matrix_scalar_double(
// CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[S:%.*]] = load double, double* %s.addr, align 8
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> undef, double [[S]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> undef, <25 x i32> zeroinitializer
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
// CHECK-NEXT: ret void
@ -235,8 +235,8 @@ void multiply_double_matrix_scalar_double(dx5x5_t a, double s) {
// CHECK: [[S:%.*]] = load double, double* %s.addr, align 8
// CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
// CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> undef, float [[S_TRUNC]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> undef, <6 x i32> zeroinitializer
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]]
// CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
@ -249,8 +249,8 @@ void multiply_float_matrix_scalar_double(fx2x3_t b, double s) {
// CHECK: [[S:%.*]] = load i16, i16* %s.addr, align 2
// CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
// CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> undef, i32 [[S_EXT]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> undef, <27 x i32> zeroinitializer
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
@ -263,8 +263,8 @@ void multiply_int_matrix_scalar_short(ix9x3_t b, short s) {
// CHECK: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[S:%.*]] = load i64, i64* %s.addr, align 8
// CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> undef, i32 [[S_TRUNC]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> undef, <27 x i32> zeroinitializer
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void

View File

@ -70,36 +70,36 @@ void foo() {
vc8 = c << vc8;
// CHECK: [[t6:%.+]] = load i8, i8* @c,
// CHECK: [[splat_splatinsert:%.+]] = insertelement <8 x i8> undef, i8 [[t6]], i32 0
// CHECK: [[splat_splat:%.+]] = shufflevector <8 x i8> [[splat_splatinsert]], <8 x i8> undef, <8 x i32> zeroinitializer
// CHECK: [[splat_splatinsert:%.+]] = insertelement <8 x i8> poison, i8 [[t6]], i32 0
// CHECK: [[splat_splat:%.+]] = shufflevector <8 x i8> [[splat_splatinsert]], <8 x i8> poison, <8 x i32> zeroinitializer
// CHECK: [[t7:%.+]] = load <8 x i8>, <8 x i8>* {{@.+}},
// CHECK: shl <8 x i8> [[splat_splat]], [[t7]]
vuc8 = i << vuc8;
// CHECK: [[t8:%.+]] = load i32, i32* @i,
// CHECK: [[tconv:%.+]] = trunc i32 [[t8]] to i8
// CHECK: [[splat_splatinsert7:%.+]] = insertelement <8 x i8> undef, i8 [[tconv]], i32 0
// CHECK: [[splat_splat8:%.+]] = shufflevector <8 x i8> [[splat_splatinsert7]], <8 x i8> undef, <8 x i32> zeroinitializer
// CHECK: [[splat_splatinsert7:%.+]] = insertelement <8 x i8> poison, i8 [[tconv]], i32 0
// CHECK: [[splat_splat8:%.+]] = shufflevector <8 x i8> [[splat_splatinsert7]], <8 x i8> poison, <8 x i32> zeroinitializer
// CHECK: [[t9:%.+]] = load <8 x i8>, <8 x i8>* {{@.+}},
// CHECK: shl <8 x i8> [[splat_splat8]], [[t9]]
vi8 = uc << vi8;
// CHECK: [[t10:%.+]] = load i8, i8* @uc,
// CHECK: [[conv10:%.+]] = zext i8 [[t10]] to i32
// CHECK: [[splat_splatinsert11:%.+]] = insertelement <8 x i32> undef, i32 [[conv10]], i32 0
// CHECK: [[splat_splat12:%.+]] = shufflevector <8 x i32> [[splat_splatinsert11]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[splat_splatinsert11:%.+]] = insertelement <8 x i32> poison, i32 [[conv10]], i32 0
// CHECK: [[splat_splat12:%.+]] = shufflevector <8 x i32> [[splat_splatinsert11]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[t11:%.+]] = load <8 x i32>, <8 x i32>* {{@.+}},
// CHECK: shl <8 x i32> [[splat_splat12]], [[t11]]
vui8 = us << vui8;
// CHECK: [[t12:%.+]] = load i16, i16* @us,
// CHECK: [[conv14:%.+]] = zext i16 [[t12]] to i32
// CHECK: [[splat_splatinsert15:%.+]] = insertelement <8 x i32> undef, i32 [[conv14]], i32 0
// CHECK: [[splat_splat16:%.+]] = shufflevector <8 x i32> [[splat_splatinsert15]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[splat_splatinsert15:%.+]] = insertelement <8 x i32> poison, i32 [[conv14]], i32 0
// CHECK: [[splat_splat16:%.+]] = shufflevector <8 x i32> [[splat_splatinsert15]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[t13:%.+]] = load <8 x i32>, <8 x i32>* {{@.+}},
// CHECK: shl <8 x i32> [[splat_splat16]], [[t13]]
vs8 = ui << vs8;
// CHECK: [[t14:%.+]] = load i32, i32* @ui,
// CHECK: [[conv18:%.+]] = trunc i32 [[t14]] to i16
// CHECK: [[splat_splatinsert19:%.+]] = insertelement <8 x i16> undef, i16 [[conv18]], i32 0
// CHECK: [[splat_splat20:%.+]] = shufflevector <8 x i16> [[splat_splatinsert19]], <8 x i16> undef, <8 x i32> zeroinitializer
// CHECK: [[splat_splatinsert19:%.+]] = insertelement <8 x i16> poison, i16 [[conv18]], i32 0
// CHECK: [[splat_splat20:%.+]] = shufflevector <8 x i16> [[splat_splatinsert19]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK: [[t15:%.+]] = load <8 x i16>, <8 x i16>* {{@.+}},
// CHECK: shl <8 x i16> [[splat_splat20]], [[t15]]
vus8 = 1 << vus8;
@ -134,28 +134,28 @@ void foo() {
vui8 <<= s;
// CHECK: [[t27:%.+]] = load i16, i16* @s,
// CHECK: [[conv40:%.+]] = sext i16 [[t27]] to i32
// CHECK: [[splat_splatinsert41:%.+]] = insertelement <8 x i32> undef, i32 [[conv40]], i32 0
// CHECK: [[splat_splat42:%.+]] = shufflevector <8 x i32> [[splat_splatinsert41]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[splat_splatinsert41:%.+]] = insertelement <8 x i32> poison, i32 [[conv40]], i32 0
// CHECK: [[splat_splat42:%.+]] = shufflevector <8 x i32> [[splat_splatinsert41]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[t28:%.+]] = load <8 x i32>, <8 x i32>* {{@.+}},
// CHECK: shl <8 x i32> [[t28]], [[splat_splat42]]
vi8 <<= us;
// CHECK: [[t29:%.+]] = load i16, i16* @us,
// CHECK: [[conv44:%.+]] = zext i16 [[t29]] to i32
// CHECK: [[splat_splatinsert45:%.+]] = insertelement <8 x i32> undef, i32 [[conv44]], i32 0
// CHECK: [[splat_splat46:%.+]] = shufflevector <8 x i32> [[splat_splatinsert45]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[splat_splatinsert45:%.+]] = insertelement <8 x i32> poison, i32 [[conv44]], i32 0
// CHECK: [[splat_splat46:%.+]] = shufflevector <8 x i32> [[splat_splatinsert45]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[t30:%.+]] = load <8 x i32>, <8 x i32>* {{@.+}},
// CHECK: shl <8 x i32> [[t30]], [[splat_splat46]]
vus8 <<= i;
// CHECK: [[t31:%.+]] = load i32, i32* @i,
// CHECK: [[splat_splatinsert48:%.+]] = insertelement <8 x i32> undef, i32 [[t31]], i32 0
// CHECK: [[splat_splat49:%.+]] = shufflevector <8 x i32> [[splat_splatinsert48]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[splat_splatinsert48:%.+]] = insertelement <8 x i32> poison, i32 [[t31]], i32 0
// CHECK: [[splat_splat49:%.+]] = shufflevector <8 x i32> [[splat_splatinsert48]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[t32:%.+]] = load <8 x i16>, <8 x i16>* {{@.+}},
// CHECK: [[sh_prom50:%.+]] = trunc <8 x i32> [[splat_splat49]] to <8 x i16>
// CHECK: shl <8 x i16> [[t32]], [[sh_prom50]]
vs8 <<= ui;
// CHECK: [[t33:%.+]] = load i32, i32* @ui,
// CHECK: [[splat_splatinsert52:%.+]] = insertelement <8 x i32> undef, i32 [[t33]], i32 0
// CHECK: [[splat_splat53:%.+]] = shufflevector <8 x i32> [[splat_splatinsert52]], <8 x i32> undef, <8 x i32> zeroinitializer
// CHECK: [[splat_splatinsert52:%.+]] = insertelement <8 x i32> poison, i32 [[t33]], i32 0
// CHECK: [[splat_splat53:%.+]] = shufflevector <8 x i32> [[splat_splatinsert52]], <8 x i32> poison, <8 x i32> zeroinitializer
// CHECK: [[t34:%.+]] = load <8 x i16>, <8 x i16>* {{@.+}},
// CHECK: [[sh_prom54:%.+]] = trunc <8 x i32> [[splat_splat53]] to <8 x i16>
// CHECK: shl <8 x i16> [[t34]], [[sh_prom54]]

View File

@ -61,8 +61,8 @@ void test_DoubleWrapper1_Sub1(MyMatrix<double, 10, 9> &m) {
// CHECK-LABEL: define void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE(
// CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
// CHECK: [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
@ -75,8 +75,8 @@ void test_DoubleWrapper1_Sub2(MyMatrix<double, 10, 9> &m) {
// CHECK-LABEL: define void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE(
// CHECK: [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
// CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
// CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
@ -96,8 +96,8 @@ void test_DoubleWrapper2_Add1(MyMatrix<double, 10, 9> &m) {
// CHECK-LABEL: define void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE(
// CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* %1, align 8
// CHECK: [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
@ -110,8 +110,8 @@ void test_DoubleWrapper2_Add2(MyMatrix<double, 10, 9> &m) {
// CHECK-LABEL: define void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE(
// CHECK: [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
// CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* %1, align 8
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
// CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
@ -132,8 +132,8 @@ void test_IntWrapper_Add(MyMatrix<double, 10, 9> &m) {
// CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
// CHECK: [[SCALAR:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
// CHECK: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR_FP]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
@ -147,8 +147,8 @@ void test_IntWrapper_Sub(MyMatrix<double, 10, 9> &m) {
// CHECK: [[SCALAR:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
// CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
// CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR_FP]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
// CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
@ -188,8 +188,8 @@ void test_IntWrapper_Multiply(MyMatrix<double, 10, 9> &m, IntWrapper &w3) {
// CHECK: [[SCALAR:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{.*}})
// CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
// CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR_FP]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
// CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
// CHECK: ret void

View File

@ -62,22 +62,22 @@ void TwoScalarOps() {
four_shorts ? some_short : some_short;
// CHECK: %[[COND:.+]] = load <4 x i16>
// CHECK: %[[LHS:.+]] = load i16
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> undef, i16 %[[LHS]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i16> %[[LHS_SPLAT_INSERT]], <4 x i16> undef, <4 x i32> zeroinitializer
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[LHS]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i16> %[[LHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
// CHECK: %[[RHS:.+]] = load i16
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> undef, i16 %[[RHS]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i16> %[[RHS_SPLAT_INSERT]], <4 x i16> undef, <4 x i32> zeroinitializer
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[RHS]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i16> %[[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
// CHECK: %[[NEZERO:.+]] = icmp ne <4 x i16> %[[COND]], zeroinitializer
// CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i16> %[[LHS_SPLAT]], <4 x i16> %[[RHS_SPLAT]]
four_shorts ? some_ushort : some_ushort;
// CHECK: %[[COND:.+]] = load <4 x i16>
// CHECK: %[[LHS:.+]] = load i16
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> undef, i16 %[[LHS]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i16> %[[LHS_SPLAT_INSERT]], <4 x i16> undef, <4 x i32> zeroinitializer
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[LHS]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i16> %[[LHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
// CHECK: %[[RHS:.+]] = load i16
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> undef, i16 %[[RHS]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i16> %[[RHS_SPLAT_INSERT]], <4 x i16> undef, <4 x i32> zeroinitializer
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[RHS]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i16> %[[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
// CHECK: %[[NEZERO:.+]] = icmp ne <4 x i16> %[[COND]], zeroinitializer
// CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i16> %[[LHS_SPLAT]], <4 x i16> %[[RHS_SPLAT]]
@ -85,12 +85,12 @@ void TwoScalarOps() {
// CHECK: %[[COND:.+]] = load <4 x i32>
// CHECK: %[[LHS:.+]] = load i16
// CHECK: %[[LHS_ZEXT:.+]] = zext i16 %[[LHS]] to i32
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[LHS_ZEXT]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[LHS_ZEXT]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: %[[RHS:.+]] = load i16
// CHECK: %[[RHS_SEXT:.+]] = sext i16 %[[RHS]] to i32
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[RHS_SEXT]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS_SEXT]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
// CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS_SPLAT]], <4 x i32> %[[RHS_SPLAT]]
@ -98,35 +98,35 @@ void TwoScalarOps() {
// CHECK: %[[COND:.+]] = load <4 x i32>
// CHECK: %[[LHS:.+]] = load i32
// CHECK: %[[LHS_CONV:.+]] = sitofp i32 %[[LHS]] to float
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x float> undef, float %[[LHS_CONV]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x float> %[[LHS_SPLAT_INSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x float> poison, float %[[LHS_CONV]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x float> %[[LHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK: %[[RHS:.+]] = load float
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x float> undef, float %[[RHS]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x float> %[[RHS_SPLAT_INSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x float> poison, float %[[RHS]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x float> %[[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
// CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x float> %[[LHS_SPLAT]], <4 x float> %[[RHS_SPLAT]]
four_ll ? some_double : some_ll;
// CHECK: %[[COND:.+]] = load <4 x i64>
// CHECK: %[[LHS:.+]] = load double
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x double> undef, double %[[LHS]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x double> %[[LHS_SPLAT_INSERT]], <4 x double> undef, <4 x i32> zeroinitializer
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x double> poison, double %[[LHS]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x double> %[[LHS_SPLAT_INSERT]], <4 x double> poison, <4 x i32> zeroinitializer
// CHECK: %[[RHS:.+]] = load i64
// CHECK: %[[RHS_CONV:.+]] = sitofp i64 %[[RHS]] to double
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x double> undef, double %[[RHS_CONV]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x double> %[[RHS_SPLAT_INSERT]], <4 x double> undef, <4 x i32> zeroinitializer
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x double> poison, double %[[RHS_CONV]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x double> %[[RHS_SPLAT_INSERT]], <4 x double> poison, <4 x i32> zeroinitializer
// CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer
// CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x double> %[[LHS_SPLAT]], <4 x double> %[[RHS_SPLAT]]
four_ints ? some_int : some_short;
// CHECK: %[[COND:.+]] = load <4 x i32>
// CHECK: %[[LHS:.+]] = load i32
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[LHS]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[LHS]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: %[[RHS:.+]] = load i16
// CHECK: %[[RHS_SEXT:.+]] = sext i16 %[[RHS]] to i32
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[RHS_SEXT]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS_SEXT]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
// CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS_SPLAT]], <4 x i32> %[[RHS_SPLAT]]
}
@ -137,8 +137,8 @@ void OneScalarOp() {
// CHECK: %[[COND:.+]] = load <4 x i32>
// CHECK: %[[LHS:.+]] = load <4 x i32>
// CHECK: %[[RHS:.+]] = load i32
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[RHS]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
// CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> %[[RHS_SPLAT]]
@ -152,8 +152,8 @@ void OneScalarOp() {
// CHECK: %[[COND:.+]] = load <4 x i32>
// CHECK: %[[RHS:.+]] = load float
// CHECK: %[[RHS_CONV:.+]] = fptosi float %[[RHS]] to i32
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[RHS_CONV]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS_CONV]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
// CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[COND]], <4 x i32> %[[RHS_SPLAT]]
@ -167,8 +167,8 @@ void OneScalarOp() {
// CHECK: %[[COND:.+]] = load <4 x i32>
// CHECK: %[[LHS:.+]] = load float
// CHECK: %[[LHS_CONV:.+]] = fptosi float %[[LHS]] to i32
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[LHS_CONV]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[LHS_CONV]], i32 0
// CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: %[[RHS:.+]] = load <4 x i32>
// CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
// CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS_SPLAT]], <4 x i32> %[[RHS]]
@ -177,8 +177,8 @@ void OneScalarOp() {
// CHECK: %[[COND:.+]] = load <4 x i32>
// CHECK: %[[LHS:.+]] = load <4 x float>
// CHECK: %[[RHS:.+]] = load float
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x float> undef, float %[[RHS]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x float> %[[RHS_SPLAT_INSERT]], <4 x float> undef, <4 x i32> zeroinitializer
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x float> poison, float %[[RHS]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x float> %[[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer
// CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
// CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x float> %[[LHS]], <4 x float> %[[RHS_SPLAT]]
@ -205,8 +205,8 @@ void OneScalarOp() {
// CHECK: %[[LHS:.+]] = load <4 x i64>
// CHECK: %[[RHS:.+]] = load i32
// CHECK: %[[RHS_CONV:.+]] = sext i32 %[[RHS]] to i64
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> undef, i64 %[[RHS_CONV]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> poison, i64 %[[RHS_CONV]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
// CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer
// CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> %[[RHS_SPLAT]]
@ -214,8 +214,8 @@ void OneScalarOp() {
// CHECK: %[[COND:.+]] = load <4 x i64>
// CHECK: %[[LHS:.+]] = load <4 x i64>
// CHECK: %[[RHS:.+]] = load i64
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> undef, i64 %[[RHS]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> poison, i64 %[[RHS]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
// CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer
// CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> %[[RHS_SPLAT]]
@ -224,8 +224,8 @@ void OneScalarOp() {
// CHECK: %[[LHS:.+]] = load <4 x i64>
// CHECK: %[[RHS:.+]] = load double
// CHECK: %[[RHS_CONV:.+]] = fptosi double %[[RHS]] to i64
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> undef, i64 %[[RHS_CONV]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
// CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> poison, i64 %[[RHS_CONV]], i32 0
// CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
// CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer
// CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> %[[RHS_SPLAT]]
}

View File

@ -56,7 +56,7 @@ gcc_int_2 FloatToIntConversion(gcc_int_2 Int2, float f) {
// CHECK: %[[LOAD_INT:.+]] = load <2 x i32>
// CHECK: %[[LOAD:.+]] = load float, float*
// CHECK: %[[CONV:.+]] = fptosi float %[[LOAD]] to i32
// CHECK: %[[INSERT:.+]] = insertelement <2 x i32> undef, i32 %[[CONV]], i32 0
// CHECK: %[[SPLAT:.+]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
// CHECK: %[[INSERT:.+]] = insertelement <2 x i32> poison, i32 %[[CONV]], i32 0
// CHECK: %[[SPLAT:.+]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
// CHECK: add <2 x i32> %[[LOAD_INT]], %[[SPLAT]]
}

View File

@ -11,8 +11,8 @@ void kernel ker() {
// CHECK: {{%.*}} = load i8, i8* %t, align 1
// CHECK: {{%.*}} = trunc i8 {{%.*}} to i1
// CHECK: {{%.*}} = sext i1 {{%.*}} to i32
// CHECK: {{%.*}} = insertelement <4 x i32> undef, i32 {{%.*}}, i32 0
// CHECK: {{%.*}} = shufflevector <4 x i32> {{%.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
// CHECK: {{%.*}} = insertelement <4 x i32> poison, i32 {{%.*}}, i32 0
// CHECK: {{%.*}} = shufflevector <4 x i32> {{%.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* %vec4, align 16
int i = (int)t;
// CHECK: {{%.*}} = load i8, i8* %t, align 1

View File

@ -60,8 +60,8 @@ int4 vectorVectorTest(int4 a,int4 b) {
//NOOPT-LABEL: @vectorScalarTest
int4 vectorScalarTest(int4 a,int b) {
//NOOPT: [[SP0:%.+]] = insertelement <4 x i32> undef
//NOOPT: [[SP1:%.+]] = shufflevector <4 x i32> [[SP0]], <4 x i32> undef, <4 x i32> zeroinitializer
//NOOPT: [[SP0:%.+]] = insertelement <4 x i32> poison
//NOOPT: [[SP1:%.+]] = shufflevector <4 x i32> [[SP0]], <4 x i32> poison, <4 x i32> zeroinitializer
//NOOPT: [[VSM:%.+]] = and <4 x i32> [[SP1]], <i32 31, i32 31, i32 31, i32 31>
//NOOPT: [[VSC:%.+]] = shl <4 x i32> [[VSS:%.+]], [[VSM]]
int4 c = a << b;

View File

@ -2524,10 +2524,10 @@ public:
}
/// Create a unary shuffle. The second vector operand of the IR instruction
/// is undefined.
/// is poison.
Value *CreateShuffleVector(Value *V, ArrayRef<int> Mask,
const Twine &Name = "") {
return CreateShuffleVector(V, UndefValue::get(V->getType()), Mask, Name);
return CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask, Name);
}
Value *CreateExtractValue(Value *Agg,

View File

@ -1009,15 +1009,16 @@ Value *IRBuilderBase::CreateVectorSplat(ElementCount EC, Value *V,
const Twine &Name) {
assert(EC.isNonZero() && "Cannot splat to an empty vector!");
// First insert it into an undef vector so we can shuffle it.
// First insert it into a poison vector so we can shuffle it.
Type *I32Ty = getInt32Ty();
Value *Undef = UndefValue::get(VectorType::get(V->getType(), EC));
V = CreateInsertElement(Undef, V, ConstantInt::get(I32Ty, 0),
Value *Poison = PoisonValue::get(VectorType::get(V->getType(), EC));
V = CreateInsertElement(Poison, V, ConstantInt::get(I32Ty, 0),
Name + ".splatinsert");
// Shuffle the value across the desired number of elements.
Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32Ty, EC));
return CreateShuffleVector(V, Undef, Zeros, Name + ".splat");
SmallVector<int, 16> Zeros;
Zeros.resize(EC.getKnownMinValue());
return CreateShuffleVector(V, Zeros, Name + ".splat");
}
Value *IRBuilderBase::CreateExtractInteger(

View File

@ -2790,8 +2790,8 @@ define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) {
; CHECK-LABEL: @sse2_psrai_w_128_masked(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <8 x i16> [[TMP3]]
;
@ -2803,8 +2803,8 @@ define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) {
define <8 x i32> @avx2_psrai_d_256_masked(<8 x i32> %v, i32 %a) {
; CHECK-LABEL: @avx2_psrai_d_256_masked(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
@ -2817,8 +2817,8 @@ define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) {
; CHECK-LABEL: @avx512_psrai_q_512_masked(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <8 x i64> [[TMP3]]
;
@ -2830,8 +2830,8 @@ define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) {
define <4 x i32> @sse2_psrli_d_128_masked(<4 x i32> %v, i32 %a) {
; CHECK-LABEL: @sse2_psrli_d_128_masked(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
@ -2844,8 +2844,8 @@ define <4 x i64> @avx2_psrli_q_256_masked(<4 x i64> %v, i32 %a) {
; CHECK-LABEL: @avx2_psrli_q_256_masked(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <4 x i64> [[TMP3]]
;
@ -2858,8 +2858,8 @@ define <32 x i16> @avx512_psrli_w_512_masked(<32 x i16> %v, i32 %a) {
; CHECK-LABEL: @avx512_psrli_w_512_masked(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> undef, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> undef, <32 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <32 x i16> [[TMP3]]
;
@ -2872,8 +2872,8 @@ define <2 x i64> @sse2_pslli_q_128_masked(<2 x i64> %v, i32 %a) {
; CHECK-LABEL: @sse2_pslli_q_128_masked(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
@ -2886,8 +2886,8 @@ define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) {
; CHECK-LABEL: @avx2_pslli_w_256_masked(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> undef, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <16 x i16> [[TMP3]]
;
@ -2899,8 +2899,8 @@ define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) {
define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) {
; CHECK-LABEL: @avx512_pslli_d_512_masked(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i32> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <16 x i32> [[TMP2]]
;

View File

@ -2790,8 +2790,8 @@ define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) {
; CHECK-LABEL: @sse2_psrai_w_128_masked(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <8 x i16> [[TMP3]]
;
@ -2803,8 +2803,8 @@ define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) {
define <8 x i32> @avx2_psrai_d_256_masked(<8 x i32> %v, i32 %a) {
; CHECK-LABEL: @avx2_psrai_d_256_masked(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
@ -2817,8 +2817,8 @@ define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) {
; CHECK-LABEL: @avx512_psrai_q_512_masked(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <8 x i64> [[TMP3]]
;
@ -2830,8 +2830,8 @@ define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) {
define <4 x i32> @sse2_psrli_d_128_masked(<4 x i32> %v, i32 %a) {
; CHECK-LABEL: @sse2_psrli_d_128_masked(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
@ -2844,8 +2844,8 @@ define <4 x i64> @avx2_psrli_q_256_masked(<4 x i64> %v, i32 %a) {
; CHECK-LABEL: @avx2_psrli_q_256_masked(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <4 x i64> [[TMP3]]
;
@ -2858,8 +2858,8 @@ define <32 x i16> @avx512_psrli_w_512_masked(<32 x i16> %v, i32 %a) {
; CHECK-LABEL: @avx512_psrli_w_512_masked(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> undef, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> undef, <32 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <32 x i16> [[TMP3]]
;
@ -2872,8 +2872,8 @@ define <2 x i64> @sse2_pslli_q_128_masked(<2 x i64> %v, i32 %a) {
; CHECK-LABEL: @sse2_pslli_q_128_masked(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
@ -2886,8 +2886,8 @@ define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) {
; CHECK-LABEL: @avx2_pslli_w_256_masked(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> undef, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <16 x i16> [[TMP3]]
;
@ -2899,8 +2899,8 @@ define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) {
define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) {
; CHECK-LABEL: @avx512_pslli_d_512_masked(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i32> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT: ret <16 x i32> [[TMP2]]
;

View File

@ -90,7 +90,7 @@ entry:
define <2 x i1> @test_vector_index(i8* %base, <2 x i64> %idx) {
; CHECK-LABEL: @test_vector_index(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8*> undef, i8* [[BASE:%.*]], i32 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8*> poison, i8* [[BASE:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <2 x i8*> [[DOTSPLATINSERT]], zeroinitializer
; CHECK-NEXT: [[CND:%.*]] = shufflevector <2 x i1> [[TMP0]], <2 x i1> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i1> [[CND]]

View File

@ -215,7 +215,7 @@ define <2 x i1> @test13_vector(<2 x i64> %X, <2 x %S*> %P) nounwind {
define <2 x i1> @test13_vector2(i64 %X, <2 x %S*> %P) nounwind {
; CHECK-LABEL: @test13_vector2(
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], <i64 2, i64 0>
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], <i64 -4, i64 poison>
; CHECK-NEXT: [[C:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> undef, <2 x i32> zeroinitializer
@ -230,7 +230,7 @@ define <2 x i1> @test13_vector2(i64 %X, <2 x %S*> %P) nounwind {
; This is a test of icmp + shl nuw in disguise - 4611... is 0x3fff...
define <2 x i1> @test13_vector3(i64 %X, <2 x %S*> %P) nounwind {
; CHECK-LABEL: @test13_vector3(
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], <i64 2, i64 0>
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], <i64 4, i64 poison>
; CHECK-NEXT: [[C:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> undef, <2 x i32> zeroinitializer

View File

@ -5,8 +5,8 @@
define <2 x i8> @narrow_shuffle_of_select(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @narrow_shuffle_of_select(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]]
; CHECK-NEXT: ret <2 x i8> [[R]]
;
@ -20,8 +20,8 @@ define <2 x i8> @narrow_shuffle_of_select(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %
define <2 x i8> @narrow_shuffle_of_select_overspecified_extend(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @narrow_shuffle_of_select_overspecified_extend(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]]
; CHECK-NEXT: ret <2 x i8> [[R]]
;
@ -35,8 +35,8 @@ define <2 x i8> @narrow_shuffle_of_select_overspecified_extend(<2 x i1> %cmp, <4
define <3 x float> @narrow_shuffle_of_select_undefs(<3 x i1> %cmp, <4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @narrow_shuffle_of_select_undefs(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 undef>
; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[CMP:%.*]], <3 x float> [[TMP1]], <3 x float> [[TMP2]]
; CHECK-NEXT: ret <3 x float> [[R]]
;

View File

@ -5,8 +5,8 @@
define <2 x i8> @narrow_shuffle_of_select(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @narrow_shuffle_of_select(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]]
; CHECK-NEXT: ret <2 x i8> [[R]]
;
@ -20,8 +20,8 @@ define <2 x i8> @narrow_shuffle_of_select(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %
define <2 x i8> @narrow_shuffle_of_select_overspecified_extend(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @narrow_shuffle_of_select_overspecified_extend(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]]
; CHECK-NEXT: ret <2 x i8> [[R]]
;
@ -35,8 +35,8 @@ define <2 x i8> @narrow_shuffle_of_select_overspecified_extend(<2 x i1> %cmp, <4
define <3 x float> @narrow_shuffle_of_select_undefs(<3 x i1> %cmp, <4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @narrow_shuffle_of_select_undefs(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 undef>
; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[CMP:%.*]], <3 x float> [[TMP1]], <3 x float> [[TMP2]]
; CHECK-NEXT: ret <3 x float> [[R]]
;

View File

@ -42,7 +42,7 @@ define i32 @type_pun_first(<16 x i8> %in) {
; Extracting an i32 that isn't aligned to any natural boundary.
define i32 @type_pun_misaligned(<16 x i8> %in) {
; CHECK-LABEL: @type_pun_misaligned(
; CHECK-NEXT: [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> undef, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> poison, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[SROA_BC:%.*]] = bitcast <16 x i8> [[SROA_EXTRACT]] to <4 x i32>
; CHECK-NEXT: [[SROA_EXTRACT1:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
; CHECK-NEXT: ret i32 [[SROA_EXTRACT1]]

View File

@ -42,7 +42,7 @@ define i32 @type_pun_first(<16 x i8> %in) {
; Extracting an i32 that isn't aligned to any natural boundary.
define i32 @type_pun_misaligned(<16 x i8> %in) {
; CHECK-LABEL: @type_pun_misaligned(
; CHECK-NEXT: [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> undef, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> poison, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[SROA_BC:%.*]] = bitcast <16 x i8> [[SROA_EXTRACT]] to <4 x i32>
; CHECK-NEXT: [[SROA_EXTRACT1:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
; CHECK-NEXT: ret i32 [[SROA_EXTRACT1]]

View File

@ -1473,7 +1473,7 @@ define <4 x float> @insert_subvector_crash_invalid_mask_elt(<2 x float> %x, <4 x
define <4 x i32> @splat_assoc_add(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @splat_assoc_add(
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], <i32 317426, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
@ -1486,7 +1486,7 @@ define <4 x i32> @splat_assoc_add(<4 x i32> %x, <4 x i32> %y) {
define <vscale x 4 x i32> @vsplat_assoc_add(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
; CHECK-LABEL: @vsplat_assoc_add(
; CHECK-NEXT: [[TMP1:%.*]] = add <vscale x 4 x i32> [[X:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 317426, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = add <vscale x 4 x i32> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
;
@ -1502,7 +1502,7 @@ define <vscale x 4 x i32> @vsplat_assoc_add(<vscale x 4 x i32> %x, <vscale x 4 x
define <4 x i32> @splat_assoc_add_undef_mask_elts(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @splat_assoc_add_undef_mask_elts(
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
@ -1517,7 +1517,7 @@ define <4 x i32> @splat_assoc_add_undef_mask_elts(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index(
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
@ -1597,7 +1597,7 @@ define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_e
define <2 x float> @splat_assoc_fmul(<2 x float> %x, <2 x float> %y) {
; CHECK-LABEL: @splat_assoc_fmul(
; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], <float poison, float 3.000000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> <i32 1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
; CHECK-NEXT: [[R:%.*]] = fmul reassoc nsz <2 x float> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <2 x float> [[R]]
;
@ -1612,7 +1612,7 @@ define <2 x float> @splat_assoc_fmul(<2 x float> %x, <2 x float> %y) {
define <3 x i8> @splat_assoc_mul(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
; CHECK-LABEL: @splat_assoc_mul(
; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;
@ -1626,7 +1626,7 @@ define <3 x i8> @splat_assoc_mul(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
define <3 x i8> @splat_assoc_mul_undef_elt1(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
; CHECK-LABEL: @splat_assoc_mul_undef_elt1(
; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;
@ -1655,7 +1655,7 @@ define <3 x i8> @splat_assoc_mul_undef_elt2(<3 x i8> %x, <3 x i8> %y, <3 x i8> %
define <3 x i8> @splat_assoc_mul_undef_elt_at_splat_index1(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
; CHECK-LABEL: @splat_assoc_mul_undef_elt_at_splat_index1(
; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;

View File

@ -1472,7 +1472,7 @@ define <4 x float> @insert_subvector_crash_invalid_mask_elt(<2 x float> %x, <4 x
define <4 x i32> @splat_assoc_add(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @splat_assoc_add(
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], <i32 317426, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
@ -1485,7 +1485,7 @@ define <4 x i32> @splat_assoc_add(<4 x i32> %x, <4 x i32> %y) {
define <vscale x 4 x i32> @vsplat_assoc_add(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
; CHECK-LABEL: @vsplat_assoc_add(
; CHECK-NEXT: [[TMP1:%.*]] = add <vscale x 4 x i32> [[X:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 317426, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = add <vscale x 4 x i32> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
;
@ -1501,7 +1501,7 @@ define <vscale x 4 x i32> @vsplat_assoc_add(<vscale x 4 x i32> %x, <vscale x 4 x
define <4 x i32> @splat_assoc_add_undef_mask_elts(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @splat_assoc_add_undef_mask_elts(
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
@ -1516,7 +1516,7 @@ define <4 x i32> @splat_assoc_add_undef_mask_elts(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index(
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
@ -1596,7 +1596,7 @@ define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_e
define <2 x float> @splat_assoc_fmul(<2 x float> %x, <2 x float> %y) {
; CHECK-LABEL: @splat_assoc_fmul(
; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], <float poison, float 3.000000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> <i32 1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
; CHECK-NEXT: [[R:%.*]] = fmul reassoc nsz <2 x float> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <2 x float> [[R]]
;
@ -1611,7 +1611,7 @@ define <2 x float> @splat_assoc_fmul(<2 x float> %x, <2 x float> %y) {
define <3 x i8> @splat_assoc_mul(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
; CHECK-LABEL: @splat_assoc_mul(
; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;
@ -1625,7 +1625,7 @@ define <3 x i8> @splat_assoc_mul(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
define <3 x i8> @splat_assoc_mul_undef_elt1(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
; CHECK-LABEL: @splat_assoc_mul_undef_elt1(
; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;
@ -1654,7 +1654,7 @@ define <3 x i8> @splat_assoc_mul_undef_elt2(<3 x i8> %x, <3 x i8> %y, <3 x i8> %
define <3 x i8> @splat_assoc_mul_undef_elt_at_splat_index1(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
; CHECK-LABEL: @splat_assoc_mul_undef_elt_at_splat_index1(
; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;

View File

@ -12,10 +12,10 @@ define <vscale x 2 x i1> @sge(<vscale x 2 x i8> %x) {
define <vscale x 2 x i1> @gep_scalevector1(i32* %X) nounwind {
; CHECK-LABEL: @gep_scalevector1(
; CHECK-NEXT: [[S:%.*]] = insertelement <vscale x 2 x i32*> undef, i32* [[X:%.*]], i32 0
; CHECK-NEXT: [[C:%.*]] = icmp eq <vscale x 2 x i32*> [[S]], zeroinitializer
; CHECK-NEXT: [[C1:%.*]] = shufflevector <vscale x 2 x i1> [[C]], <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: ret <vscale x 2 x i1> [[C1]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32*> poison, i32* [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <vscale x 2 x i32*> [[DOTSPLATINSERT]], zeroinitializer
; CHECK-NEXT: [[C:%.*]] = shufflevector <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: ret <vscale x 2 x i1> [[C]]
;
%A = getelementptr inbounds i32, i32* %X, <vscale x 2 x i64> zeroinitializer
%C = icmp eq <vscale x 2 x i32*> %A, zeroinitializer

View File

@ -104,10 +104,10 @@ for.end: ; preds = %for.body
; CHECK-LABEL: @ptr_ind_plus2(
; CHECK: %[[V0:.*]] = load <8 x i32>
; CHECK: %[[V1:.*]] = load <8 x i32>
; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: mul nsw <4 x i32>
; CHECK: mul nsw <4 x i32>
; CHECK: add <4 x i32>
@ -117,8 +117,8 @@ for.end: ; preds = %for.body
; FORCE-VEC-LABEL: @ptr_ind_plus2(
; FORCE-VEC: %[[V:.*]] = load <4 x i32>
; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> undef, <2 x i32> <i32 0, i32 2>
; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> undef, <2 x i32> <i32 1, i32 3>
; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
; FORCE-VEC: mul nsw <2 x i32>
; FORCE-VEC: add <2 x i32>
; FORCE-VEC: %index.next = add i64 %index, 2

View File

@ -18,8 +18,8 @@
; CHECK-LABEL: @foo_i32(
; CHECK-LABEL: vector.ph:
; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> undef, i32 %n, i32 0
; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i32 0
; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-LABEL: vector.body:
; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
@ -85,8 +85,8 @@ for.end10: ; preds = %for.inc8
; CHECK-LABEL: @foo_i64(
; CHECK-LABEL: vector.ph:
; CHECK: %[[SplatVal:.*]] = insertelement <2 x i64> undef, i64 %n, i32 0
; CHECK: %[[Splat:.*]] = shufflevector <2 x i64> %[[SplatVal]], <2 x i64> undef, <2 x i32> zeroinitializer
; CHECK: %[[SplatVal:.*]] = insertelement <2 x i64> poison, i64 %n, i32 0
; CHECK: %[[Splat:.*]] = shufflevector <2 x i64> %[[SplatVal]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-LABEL: vector.body:
; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]

View File

@ -12,8 +12,8 @@ define void @test_stride1_4i32(i32* readonly %data, i32* noalias nocapture %dst,
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
@ -101,7 +101,7 @@ define void @test_stride-1_4i32(i32* readonly %data, i32* noalias nocapture %dst
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 -3
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[REVERSE]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP8]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 0
@ -170,7 +170,7 @@ define void @test_stride2_4i32(i32* readonly %data, i32* noalias nocapture %dst,
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>*
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP7]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[STRIDED_VEC]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP2]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
@ -369,8 +369,8 @@ define void @test_stride_loopinvar_4i32(i32* readonly %data, i32* noalias nocapt
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP1]], i32 [[N]])
@ -554,13 +554,13 @@ define void @test_stride_noninvar3_4i32(i32* readonly %data, i32* noalias nocapt
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[N_VEC]], [[X:%.*]]
; CHECK-NEXT: [[IND_END:%.*]] = add i32 3, [[TMP0]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[X]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> <i32 3, i32 3, i32 3, i32 3>, [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[X]], 4
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

View File

@ -19,8 +19,8 @@ define i32 @mla_i32(i8* noalias nocapture readonly %A, i8* noalias nocapture rea
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
@ -107,8 +107,8 @@ define i32 @mla_i8(i8* noalias nocapture readonly %A, i8* noalias nocapture read
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[TMP0]], i32 [[N]])
@ -195,8 +195,8 @@ define i32 @add_i32(i32* nocapture readonly %x, i32 %n) #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
@ -266,8 +266,8 @@ define i32 @mul_i32(i32* nocapture readonly %x, i32 %n) #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
@ -337,8 +337,8 @@ define i32 @and_i32(i32* nocapture readonly %x, i32 %n) #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
@ -408,8 +408,8 @@ define i32 @or_i32(i32* nocapture readonly %x, i32 %n) #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
@ -479,8 +479,8 @@ define i32 @xor_i32(i32* nocapture readonly %x, i32 %n) #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
@ -550,8 +550,8 @@ define float @fadd_f32(float* nocapture readonly %x, i32 %n) #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
@ -621,8 +621,8 @@ define float @fmul_f32(float* nocapture readonly %x, i32 %n) #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])

View File

@ -7,8 +7,8 @@ target triple = "thumbv8.1m.main-none-none-eabi"
define hidden void @pointer_phi_v4i32_add1(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %s, i32%y) {
; CHECK-LABEL: @pointer_phi_v4i32_add1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -48,8 +48,8 @@ define hidden void @pointer_phi_v4i32_add2(i32* noalias nocapture readonly %A, i
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 1992
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 996
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -58,7 +58,7 @@ define hidden void @pointer_phi_v4i32_add2(i32* noalias nocapture readonly %A, i
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <8 x i32>*
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[NEXT_GEP4]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4
@ -103,8 +103,8 @@ define hidden void @pointer_phi_v4i32_add3(i32* noalias nocapture readonly %A, i
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 2988
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 996
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
@ -156,8 +156,8 @@ define hidden void @pointer_phi_v8i16_add1(i16* noalias nocapture readonly %A, i
; CHECK-LABEL: @pointer_phi_v8i16_add1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[TMP0]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -199,8 +199,8 @@ define hidden void @pointer_phi_v8i16_add2(i16* noalias nocapture readonly %A, i
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1984
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 992
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[TMP0]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -209,7 +209,7 @@ define hidden void @pointer_phi_v8i16_add2(i16* noalias nocapture readonly %A, i
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[NEXT_GEP]] to <16 x i16>*
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i16>, <16 x i16>* [[TMP2]], align 2
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
@ -295,8 +295,8 @@ define hidden void @pointer_phi_v16i8_add1(i8* noalias nocapture readonly %A, i8
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[A:%.*]], i32 992
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, i8* [[B:%.*]], i32 992
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -351,8 +351,8 @@ define hidden void @pointer_phi_v16i8_add2(i8* noalias nocapture readonly %A, i8
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[A:%.*]], i32 1984
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, i8* [[B:%.*]], i32 992
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -361,7 +361,7 @@ define hidden void @pointer_phi_v16i8_add2(i8* noalias nocapture readonly %A, i8
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[B]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[NEXT_GEP]] to <32 x i8>*
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, <32 x i8>* [[TMP2]], align 1
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i8> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[NEXT_GEP4]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP3]], <16 x i8>* [[TMP4]], align 1
@ -446,8 +446,8 @@ end:
define hidden void @pointer_phi_v4f32_add1(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) {
; CHECK-LABEL: @pointer_phi_v4f32_add1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -487,8 +487,8 @@ define hidden void @pointer_phi_v4f32_add2(float* noalias nocapture readonly %A,
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr float, float* [[A:%.*]], i32 1992
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr float, float* [[B:%.*]], i32 996
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -497,7 +497,7 @@ define hidden void @pointer_phi_v4f32_add2(float* noalias nocapture readonly %A,
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr float, float* [[B]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[NEXT_GEP]] to <8 x float>*
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, <8 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[NEXT_GEP4]] to <4 x float>*
; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
@ -542,8 +542,8 @@ define hidden void @pointer_phi_v4f32_add3(float* noalias nocapture readonly %A,
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr float, float* [[A:%.*]], i32 2988
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr float, float* [[B:%.*]], i32 996
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi float* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
@ -594,8 +594,8 @@ end:
define hidden void @pointer_phi_v4half_add1(half* noalias nocapture readonly %A, half* noalias nocapture %B, half %y) {
; CHECK-LABEL: @pointer_phi_v4half_add1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -635,8 +635,8 @@ define hidden void @pointer_phi_v4half_add2(half* noalias nocapture readonly %A,
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr half, half* [[A:%.*]], i32 1984
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr half, half* [[B:%.*]], i32 992
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -645,7 +645,7 @@ define hidden void @pointer_phi_v4half_add2(half* noalias nocapture readonly %A,
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[NEXT_GEP]] to <16 x half>*
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x half>, <16 x half>* [[TMP1]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x half> [[WIDE_VEC]], <16 x half> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x half> [[WIDE_VEC]], <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <8 x half> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast half* [[NEXT_GEP4]] to <8 x half>*
; CHECK-NEXT: store <8 x half> [[TMP2]], <8 x half>* [[TMP3]], align 4
@ -730,10 +730,10 @@ define hidden void @pointer_phi_v4i32_uf2(i32* noalias nocapture readonly %A, i3
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 59952
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 9992
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[Y]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
@ -798,14 +798,14 @@ define hidden void @pointer_phi_v4i32_uf4(i32* noalias nocapture readonly %A, i3
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 59904
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 9984
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i32> undef, i32 [[Y]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT10]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i32> undef, i32 [[Y]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT12]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i32> undef, i32 [[Y]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT14]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT10]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT12]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT14]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]

View File

@ -300,7 +300,7 @@ define void @strides_different_direction(i32* noalias nocapture %A, i32* noalias
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 -3
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP15]], align 4
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[REVERSE]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP7]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0

View File

@ -13,11 +13,11 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias
; VF-TWO-CHECK-NEXT: [[BB3:%.*]] = bitcast float* [[BB:%.*]] to i8*
; VF-TWO-CHECK-NEXT: [[CC6:%.*]] = bitcast float* [[CC:%.*]] to i8*
; VF-TWO-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
; VF-TWO-CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; VF-TWO-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]]
; VF-TWO-CHECK: iter.check:
; VF-TWO-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2
; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; VF-TWO-CHECK: vector.memcheck:
; VF-TWO-CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[AA]], i64 [[WIDE_TRIP_COUNT]]
; VF-TWO-CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8*
@ -33,7 +33,7 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias
; VF-TWO-CHECK-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
; VF-TWO-CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
; VF-TWO-CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
; VF-TWO-CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; VF-TWO-CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; VF-TWO-CHECK: vector.main.loop.iter.check:
; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK12:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 48
; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK12]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
@ -69,40 +69,40 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias
; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP11]]
; VF-TWO-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0
; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP25]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP25]], align 4, !alias.scope !0
; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4
; VF-TWO-CHECK-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, <4 x float>* [[TMP27]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, <4 x float>* [[TMP27]], align 4, !alias.scope !0
; VF-TWO-CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 8
; VF-TWO-CHECK-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP29]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP29]], align 4, !alias.scope !0
; VF-TWO-CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 12
; VF-TWO-CHECK-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP31]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP31]], align 4, !alias.scope !0
; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 16
; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = bitcast float* [[TMP32]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP33]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP33]], align 4, !alias.scope !0
; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 20
; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = bitcast float* [[TMP34]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <4 x float>, <4 x float>* [[TMP35]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <4 x float>, <4 x float>* [[TMP35]], align 4, !alias.scope !0
; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 24
; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = bitcast float* [[TMP36]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP37]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP37]], align 4, !alias.scope !0
; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 28
; VF-TWO-CHECK-NEXT: [[TMP39:%.*]] = bitcast float* [[TMP38]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <4 x float>, <4 x float>* [[TMP39]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <4 x float>, <4 x float>* [[TMP39]], align 4, !alias.scope !0
; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 32
; VF-TWO-CHECK-NEXT: [[TMP41:%.*]] = bitcast float* [[TMP40]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP41]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP41]], align 4, !alias.scope !0
; VF-TWO-CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 36
; VF-TWO-CHECK-NEXT: [[TMP43:%.*]] = bitcast float* [[TMP42]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <4 x float>, <4 x float>* [[TMP43]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <4 x float>, <4 x float>* [[TMP43]], align 4, !alias.scope !0
; VF-TWO-CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 40
; VF-TWO-CHECK-NEXT: [[TMP45:%.*]] = bitcast float* [[TMP44]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD22:%.*]] = load <4 x float>, <4 x float>* [[TMP45]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD22:%.*]] = load <4 x float>, <4 x float>* [[TMP45]], align 4, !alias.scope !0
; VF-TWO-CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 44
; VF-TWO-CHECK-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP46]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, <4 x float>* [[TMP47]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, <4 x float>* [[TMP47]], align 4, !alias.scope !0
; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP0]]
; VF-TWO-CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP1]]
; VF-TWO-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP2]]
@ -117,40 +117,40 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias
; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP11]]
; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 0
; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP60]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <4 x float>, <4 x float>* [[TMP61]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <4 x float>, <4 x float>* [[TMP61]], align 4, !alias.scope !3
; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 4
; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = bitcast float* [[TMP62]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <4 x float>, <4 x float>* [[TMP63]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <4 x float>, <4 x float>* [[TMP63]], align 4, !alias.scope !3
; VF-TWO-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 8
; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = bitcast float* [[TMP64]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <4 x float>, <4 x float>* [[TMP65]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <4 x float>, <4 x float>* [[TMP65]], align 4, !alias.scope !3
; VF-TWO-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 12
; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = bitcast float* [[TMP66]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD27:%.*]] = load <4 x float>, <4 x float>* [[TMP67]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD27:%.*]] = load <4 x float>, <4 x float>* [[TMP67]], align 4, !alias.scope !3
; VF-TWO-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 16
; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = bitcast float* [[TMP68]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD28:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD28:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4, !alias.scope !3
; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 20
; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP70]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD29:%.*]] = load <4 x float>, <4 x float>* [[TMP71]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD29:%.*]] = load <4 x float>, <4 x float>* [[TMP71]], align 4, !alias.scope !3
; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 24
; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = bitcast float* [[TMP72]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <4 x float>, <4 x float>* [[TMP73]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <4 x float>, <4 x float>* [[TMP73]], align 4, !alias.scope !3
; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 28
; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = bitcast float* [[TMP74]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4, !alias.scope !3
; VF-TWO-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 32
; VF-TWO-CHECK-NEXT: [[TMP77:%.*]] = bitcast float* [[TMP76]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <4 x float>, <4 x float>* [[TMP77]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <4 x float>, <4 x float>* [[TMP77]], align 4, !alias.scope !3
; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 36
; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = bitcast float* [[TMP78]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <4 x float>, <4 x float>* [[TMP79]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <4 x float>, <4 x float>* [[TMP79]], align 4, !alias.scope !3
; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 40
; VF-TWO-CHECK-NEXT: [[TMP81:%.*]] = bitcast float* [[TMP80]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4, !alias.scope !3
; VF-TWO-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 44
; VF-TWO-CHECK-NEXT: [[TMP83:%.*]] = bitcast float* [[TMP82]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <4 x float>, <4 x float>* [[TMP83]], align 4
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <4 x float>, <4 x float>* [[TMP83]], align 4, !alias.scope !3
; VF-TWO-CHECK-NEXT: [[TMP84:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD24]]
; VF-TWO-CHECK-NEXT: [[TMP85:%.*]] = fadd fast <4 x float> [[WIDE_LOAD13]], [[WIDE_LOAD25]]
; VF-TWO-CHECK-NEXT: [[TMP86:%.*]] = fadd fast <4 x float> [[WIDE_LOAD14]], [[WIDE_LOAD26]]
@ -177,57 +177,57 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias
; VF-TWO-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP11]]
; VF-TWO-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 0
; VF-TWO-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP109]], align 4
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP109]], align 4, !alias.scope !5, !noalias !7
; VF-TWO-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 4
; VF-TWO-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP111]], align 4
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP111]], align 4, !alias.scope !5, !noalias !7
; VF-TWO-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 8
; VF-TWO-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP113]], align 4
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP113]], align 4, !alias.scope !5, !noalias !7
; VF-TWO-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 12
; VF-TWO-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP115]], align 4
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP115]], align 4, !alias.scope !5, !noalias !7
; VF-TWO-CHECK-NEXT: [[TMP116:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 16
; VF-TWO-CHECK-NEXT: [[TMP117:%.*]] = bitcast float* [[TMP116]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP117]], align 4
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP117]], align 4, !alias.scope !5, !noalias !7
; VF-TWO-CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 20
; VF-TWO-CHECK-NEXT: [[TMP119:%.*]] = bitcast float* [[TMP118]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP119]], align 4
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP119]], align 4, !alias.scope !5, !noalias !7
; VF-TWO-CHECK-NEXT: [[TMP120:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 24
; VF-TWO-CHECK-NEXT: [[TMP121:%.*]] = bitcast float* [[TMP120]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP121]], align 4
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP121]], align 4, !alias.scope !5, !noalias !7
; VF-TWO-CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 28
; VF-TWO-CHECK-NEXT: [[TMP123:%.*]] = bitcast float* [[TMP122]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP123]], align 4
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP123]], align 4, !alias.scope !5, !noalias !7
; VF-TWO-CHECK-NEXT: [[TMP124:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 32
; VF-TWO-CHECK-NEXT: [[TMP125:%.*]] = bitcast float* [[TMP124]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP125]], align 4
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP125]], align 4, !alias.scope !5, !noalias !7
; VF-TWO-CHECK-NEXT: [[TMP126:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 36
; VF-TWO-CHECK-NEXT: [[TMP127:%.*]] = bitcast float* [[TMP126]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP127]], align 4
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP127]], align 4, !alias.scope !5, !noalias !7
; VF-TWO-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 40
; VF-TWO-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP129]], align 4
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP129]], align 4, !alias.scope !5, !noalias !7
; VF-TWO-CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 44
; VF-TWO-CHECK-NEXT: [[TMP131:%.*]] = bitcast float* [[TMP130]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP131]], align 4
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP131]], align 4, !alias.scope !5, !noalias !7
; VF-TWO-CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 48
; VF-TWO-CHECK-NEXT: [[TMP132:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VF-TWO-CHECK-NEXT: br i1 [[TMP132]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOPID_MV:!.*]]
; VF-TWO-CHECK-NEXT: br i1 [[TMP132]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
; VF-TWO-CHECK: middle.block:
; VF-TWO-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; VF-TWO-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; VF-TWO-CHECK: vec.epilog.iter.check:
; VF-TWO-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; VF-TWO-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
; VF-TWO-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[SCALAR_PH]], label [[VEC_EPILOG_PH]]
; VF-TWO-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; VF-TWO-CHECK: vec.epilog.ph:
; VF-TWO-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; VF-TWO-CHECK-NEXT: [[N_MOD_VF36:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2
; VF-TWO-CHECK-NEXT: [[N_VEC37:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF36]]
; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_BODY:%.*]]
; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; VF-TWO-CHECK: vec.epilog.vector.body:
; VF-TWO-CHECK-NEXT: [[INDEX38:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT39:%.*]], [[VEC_EPILOG_BODY]] ]
; VF-TWO-CHECK-NEXT: [[INDEX38:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT39:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; VF-TWO-CHECK-NEXT: [[TMP133:%.*]] = add i64 [[INDEX38]], 0
; VF-TWO-CHECK-NEXT: [[TMP134:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP133]]
; VF-TWO-CHECK-NEXT: [[TMP135:%.*]] = getelementptr inbounds float, float* [[TMP134]], i32 0
@ -244,15 +244,15 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias
; VF-TWO-CHECK-NEXT: store <2 x float> [[TMP140]], <2 x float>* [[TMP143]], align 4
; VF-TWO-CHECK-NEXT: [[INDEX_NEXT39]] = add i64 [[INDEX38]], 2
; VF-TWO-CHECK-NEXT: [[TMP144:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC37]]
; VF-TWO-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_BODY]], !llvm.loop [[LOOPID_EV:!.*]]
; VF-TWO-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]]
; VF-TWO-CHECK: vec.epilog.middle.block:
; VF-TWO-CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC37]]
; VF-TWO-CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; VF-TWO-CHECK: scalar.ph:
; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC37]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; VF-TWO-CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
; VF-TWO-CHECK: vec.epilog.scalar.ph:
; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC37]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ]
; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]]
; VF-TWO-CHECK: for.body:
; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; VF-TWO-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[INDVARS_IV]]
; VF-TWO-CHECK-NEXT: [[TMP145:%.*]] = load float, float* [[ARRAYIDX]], align 4
; VF-TWO-CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[INDVARS_IV]]
@ -262,7 +262,7 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias
; VF-TWO-CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4
; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOPID_MS:!.*]]
; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP12:!llvm.loop !.*]]
; VF-TWO-CHECK: for.end.loopexit.loopexit:
; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]]
; VF-TWO-CHECK: for.end.loopexit:
@ -270,10 +270,269 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias
; VF-TWO-CHECK: for.end:
; VF-TWO-CHECK-NEXT: ret void
;
; VF-TWO-CHECK-DAG: [[LOOPID_MV]] = distinct !{[[LOOPID_MV]], [[LOOPID_DISABLE_VECT:!.*]]}
; VF-TWO-CHECK-DAG: [[LOOPID_EV]] = distinct !{[[LOOPID_EV]], [[LOOPID_DISABLE_UNROLL:!.*]], [[LOOPID_DISABLE_VECT:!.*]]}
; VF-TWO-CHECK-DAG: [[LOOPID_DISABLE_VECT]] = [[DISABLE_VECT_STR:!{!"llvm.loop.isvectorized".*}.*]]
; VF-TWO-CHECK-DAG: [[LOOPID_DISABLE_UNROLL]] = [[DISABLE_UNROLL_STR:!{!"llvm.loop.unroll.runtime.disable"}.*]]
; VF-FOUR-CHECK-LABEL: @f1(
; VF-FOUR-CHECK-NEXT: entry:
; VF-FOUR-CHECK-NEXT: [[AA1:%.*]] = bitcast float* [[AA:%.*]] to i8*
; VF-FOUR-CHECK-NEXT: [[BB3:%.*]] = bitcast float* [[BB:%.*]] to i8*
; VF-FOUR-CHECK-NEXT: [[CC6:%.*]] = bitcast float* [[CC:%.*]] to i8*
; VF-FOUR-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
; VF-FOUR-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]]
; VF-FOUR-CHECK: iter.check:
; VF-FOUR-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; VF-FOUR-CHECK: vector.memcheck:
; VF-FOUR-CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[AA]], i64 [[WIDE_TRIP_COUNT]]
; VF-FOUR-CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8*
; VF-FOUR-CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[BB]], i64 [[WIDE_TRIP_COUNT]]
; VF-FOUR-CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8*
; VF-FOUR-CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr float, float* [[CC]], i64 [[WIDE_TRIP_COUNT]]
; VF-FOUR-CHECK-NEXT: [[SCEVGEP78:%.*]] = bitcast float* [[SCEVGEP7]] to i8*
; VF-FOUR-CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[AA1]], [[SCEVGEP45]]
; VF-FOUR-CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[BB3]], [[SCEVGEP2]]
; VF-FOUR-CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; VF-FOUR-CHECK-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[AA1]], [[SCEVGEP78]]
; VF-FOUR-CHECK-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[CC6]], [[SCEVGEP2]]
; VF-FOUR-CHECK-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
; VF-FOUR-CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
; VF-FOUR-CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
; VF-FOUR-CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; VF-FOUR-CHECK: vector.main.loop.iter.check:
; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK12:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 48
; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK12]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; VF-FOUR-CHECK: vector.ph:
; VF-FOUR-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 48
; VF-FOUR-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; VF-FOUR-CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; VF-FOUR-CHECK: vector.body:
; VF-FOUR-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF-FOUR-CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; VF-FOUR-CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
; VF-FOUR-CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
; VF-FOUR-CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
; VF-FOUR-CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 16
; VF-FOUR-CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 20
; VF-FOUR-CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 24
; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 28
; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 32
; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 36
; VF-FOUR-CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 40
; VF-FOUR-CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 44
; VF-FOUR-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP0]]
; VF-FOUR-CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP1]]
; VF-FOUR-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP2]]
; VF-FOUR-CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP3]]
; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP4]]
; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP5]]
; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP6]]
; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP7]]
; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP8]]
; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP9]]
; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP10]]
; VF-FOUR-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP11]]
; VF-FOUR-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0
; VF-FOUR-CHECK-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP25]], align 4, !alias.scope !0
; VF-FOUR-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4
; VF-FOUR-CHECK-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, <4 x float>* [[TMP27]], align 4, !alias.scope !0
; VF-FOUR-CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 8
; VF-FOUR-CHECK-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP29]], align 4, !alias.scope !0
; VF-FOUR-CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 12
; VF-FOUR-CHECK-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP31]], align 4, !alias.scope !0
; VF-FOUR-CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 16
; VF-FOUR-CHECK-NEXT: [[TMP33:%.*]] = bitcast float* [[TMP32]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP33]], align 4, !alias.scope !0
; VF-FOUR-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 20
; VF-FOUR-CHECK-NEXT: [[TMP35:%.*]] = bitcast float* [[TMP34]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <4 x float>, <4 x float>* [[TMP35]], align 4, !alias.scope !0
; VF-FOUR-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 24
; VF-FOUR-CHECK-NEXT: [[TMP37:%.*]] = bitcast float* [[TMP36]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP37]], align 4, !alias.scope !0
; VF-FOUR-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 28
; VF-FOUR-CHECK-NEXT: [[TMP39:%.*]] = bitcast float* [[TMP38]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <4 x float>, <4 x float>* [[TMP39]], align 4, !alias.scope !0
; VF-FOUR-CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 32
; VF-FOUR-CHECK-NEXT: [[TMP41:%.*]] = bitcast float* [[TMP40]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP41]], align 4, !alias.scope !0
; VF-FOUR-CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 36
; VF-FOUR-CHECK-NEXT: [[TMP43:%.*]] = bitcast float* [[TMP42]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <4 x float>, <4 x float>* [[TMP43]], align 4, !alias.scope !0
; VF-FOUR-CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 40
; VF-FOUR-CHECK-NEXT: [[TMP45:%.*]] = bitcast float* [[TMP44]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD22:%.*]] = load <4 x float>, <4 x float>* [[TMP45]], align 4, !alias.scope !0
; VF-FOUR-CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 44
; VF-FOUR-CHECK-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP46]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, <4 x float>* [[TMP47]], align 4, !alias.scope !0
; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP0]]
; VF-FOUR-CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP1]]
; VF-FOUR-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP2]]
; VF-FOUR-CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP3]]
; VF-FOUR-CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP4]]
; VF-FOUR-CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP5]]
; VF-FOUR-CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP6]]
; VF-FOUR-CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP7]]
; VF-FOUR-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP8]]
; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP9]]
; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP10]]
; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP11]]
; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 0
; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP60]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <4 x float>, <4 x float>* [[TMP61]], align 4, !alias.scope !3
; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 4
; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = bitcast float* [[TMP62]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <4 x float>, <4 x float>* [[TMP63]], align 4, !alias.scope !3
; VF-FOUR-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 8
; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = bitcast float* [[TMP64]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <4 x float>, <4 x float>* [[TMP65]], align 4, !alias.scope !3
; VF-FOUR-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 12
; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = bitcast float* [[TMP66]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD27:%.*]] = load <4 x float>, <4 x float>* [[TMP67]], align 4, !alias.scope !3
; VF-FOUR-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 16
; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = bitcast float* [[TMP68]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD28:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4, !alias.scope !3
; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 20
; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP70]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD29:%.*]] = load <4 x float>, <4 x float>* [[TMP71]], align 4, !alias.scope !3
; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 24
; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = bitcast float* [[TMP72]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <4 x float>, <4 x float>* [[TMP73]], align 4, !alias.scope !3
; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 28
; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = bitcast float* [[TMP74]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4, !alias.scope !3
; VF-FOUR-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 32
; VF-FOUR-CHECK-NEXT: [[TMP77:%.*]] = bitcast float* [[TMP76]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <4 x float>, <4 x float>* [[TMP77]], align 4, !alias.scope !3
; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 36
; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = bitcast float* [[TMP78]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <4 x float>, <4 x float>* [[TMP79]], align 4, !alias.scope !3
; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 40
; VF-FOUR-CHECK-NEXT: [[TMP81:%.*]] = bitcast float* [[TMP80]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4, !alias.scope !3
; VF-FOUR-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 44
; VF-FOUR-CHECK-NEXT: [[TMP83:%.*]] = bitcast float* [[TMP82]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <4 x float>, <4 x float>* [[TMP83]], align 4, !alias.scope !3
; VF-FOUR-CHECK-NEXT: [[TMP84:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD24]]
; VF-FOUR-CHECK-NEXT: [[TMP85:%.*]] = fadd fast <4 x float> [[WIDE_LOAD13]], [[WIDE_LOAD25]]
; VF-FOUR-CHECK-NEXT: [[TMP86:%.*]] = fadd fast <4 x float> [[WIDE_LOAD14]], [[WIDE_LOAD26]]
; VF-FOUR-CHECK-NEXT: [[TMP87:%.*]] = fadd fast <4 x float> [[WIDE_LOAD15]], [[WIDE_LOAD27]]
; VF-FOUR-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[WIDE_LOAD16]], [[WIDE_LOAD28]]
; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[WIDE_LOAD17]], [[WIDE_LOAD29]]
; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[WIDE_LOAD18]], [[WIDE_LOAD30]]
; VF-FOUR-CHECK-NEXT: [[TMP91:%.*]] = fadd fast <4 x float> [[WIDE_LOAD19]], [[WIDE_LOAD31]]
; VF-FOUR-CHECK-NEXT: [[TMP92:%.*]] = fadd fast <4 x float> [[WIDE_LOAD20]], [[WIDE_LOAD32]]
; VF-FOUR-CHECK-NEXT: [[TMP93:%.*]] = fadd fast <4 x float> [[WIDE_LOAD21]], [[WIDE_LOAD33]]
; VF-FOUR-CHECK-NEXT: [[TMP94:%.*]] = fadd fast <4 x float> [[WIDE_LOAD22]], [[WIDE_LOAD34]]
; VF-FOUR-CHECK-NEXT: [[TMP95:%.*]] = fadd fast <4 x float> [[WIDE_LOAD23]], [[WIDE_LOAD35]]
; VF-FOUR-CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP0]]
; VF-FOUR-CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP1]]
; VF-FOUR-CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP2]]
; VF-FOUR-CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP3]]
; VF-FOUR-CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP4]]
; VF-FOUR-CHECK-NEXT: [[TMP101:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP5]]
; VF-FOUR-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP6]]
; VF-FOUR-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP7]]
; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP8]]
; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP9]]
; VF-FOUR-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP10]]
; VF-FOUR-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP11]]
; VF-FOUR-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 0
; VF-FOUR-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP109]], align 4, !alias.scope !5, !noalias !7
; VF-FOUR-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 4
; VF-FOUR-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP111]], align 4, !alias.scope !5, !noalias !7
; VF-FOUR-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 8
; VF-FOUR-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP113]], align 4, !alias.scope !5, !noalias !7
; VF-FOUR-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 12
; VF-FOUR-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP115]], align 4, !alias.scope !5, !noalias !7
; VF-FOUR-CHECK-NEXT: [[TMP116:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 16
; VF-FOUR-CHECK-NEXT: [[TMP117:%.*]] = bitcast float* [[TMP116]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP117]], align 4, !alias.scope !5, !noalias !7
; VF-FOUR-CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 20
; VF-FOUR-CHECK-NEXT: [[TMP119:%.*]] = bitcast float* [[TMP118]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP119]], align 4, !alias.scope !5, !noalias !7
; VF-FOUR-CHECK-NEXT: [[TMP120:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 24
; VF-FOUR-CHECK-NEXT: [[TMP121:%.*]] = bitcast float* [[TMP120]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP121]], align 4, !alias.scope !5, !noalias !7
; VF-FOUR-CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 28
; VF-FOUR-CHECK-NEXT: [[TMP123:%.*]] = bitcast float* [[TMP122]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP123]], align 4, !alias.scope !5, !noalias !7
; VF-FOUR-CHECK-NEXT: [[TMP124:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 32
; VF-FOUR-CHECK-NEXT: [[TMP125:%.*]] = bitcast float* [[TMP124]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP125]], align 4, !alias.scope !5, !noalias !7
; VF-FOUR-CHECK-NEXT: [[TMP126:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 36
; VF-FOUR-CHECK-NEXT: [[TMP127:%.*]] = bitcast float* [[TMP126]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP127]], align 4, !alias.scope !5, !noalias !7
; VF-FOUR-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 40
; VF-FOUR-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP129]], align 4, !alias.scope !5, !noalias !7
; VF-FOUR-CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 44
; VF-FOUR-CHECK-NEXT: [[TMP131:%.*]] = bitcast float* [[TMP130]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP131]], align 4, !alias.scope !5, !noalias !7
; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 48
; VF-FOUR-CHECK-NEXT: [[TMP132:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP132]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
; VF-FOUR-CHECK: middle.block:
; VF-FOUR-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; VF-FOUR-CHECK: vec.epilog.iter.check:
; VF-FOUR-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; VF-FOUR-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
; VF-FOUR-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; VF-FOUR-CHECK: vec.epilog.ph:
; VF-FOUR-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; VF-FOUR-CHECK-NEXT: [[N_MOD_VF36:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
; VF-FOUR-CHECK-NEXT: [[N_VEC37:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF36]]
; VF-FOUR-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; VF-FOUR-CHECK: vec.epilog.vector.body:
; VF-FOUR-CHECK-NEXT: [[INDEX38:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT39:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; VF-FOUR-CHECK-NEXT: [[TMP133:%.*]] = add i64 [[INDEX38]], 0
; VF-FOUR-CHECK-NEXT: [[TMP134:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP133]]
; VF-FOUR-CHECK-NEXT: [[TMP135:%.*]] = getelementptr inbounds float, float* [[TMP134]], i32 0
; VF-FOUR-CHECK-NEXT: [[TMP136:%.*]] = bitcast float* [[TMP135]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD41:%.*]] = load <4 x float>, <4 x float>* [[TMP136]], align 4
; VF-FOUR-CHECK-NEXT: [[TMP137:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP133]]
; VF-FOUR-CHECK-NEXT: [[TMP138:%.*]] = getelementptr inbounds float, float* [[TMP137]], i32 0
; VF-FOUR-CHECK-NEXT: [[TMP139:%.*]] = bitcast float* [[TMP138]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD42:%.*]] = load <4 x float>, <4 x float>* [[TMP139]], align 4
; VF-FOUR-CHECK-NEXT: [[TMP140:%.*]] = fadd fast <4 x float> [[WIDE_LOAD41]], [[WIDE_LOAD42]]
; VF-FOUR-CHECK-NEXT: [[TMP141:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP133]]
; VF-FOUR-CHECK-NEXT: [[TMP142:%.*]] = getelementptr inbounds float, float* [[TMP141]], i32 0
; VF-FOUR-CHECK-NEXT: [[TMP143:%.*]] = bitcast float* [[TMP142]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP140]], <4 x float>* [[TMP143]], align 4
; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT39]] = add i64 [[INDEX38]], 4
; VF-FOUR-CHECK-NEXT: [[TMP144:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC37]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]]
; VF-FOUR-CHECK: vec.epilog.middle.block:
; VF-FOUR-CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC37]]
; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
; VF-FOUR-CHECK: vec.epilog.scalar.ph:
; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC37]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ]
; VF-FOUR-CHECK-NEXT: br label [[FOR_BODY:%.*]]
; VF-FOUR-CHECK: for.body:
; VF-FOUR-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; VF-FOUR-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[INDVARS_IV]]
; VF-FOUR-CHECK-NEXT: [[TMP145:%.*]] = load float, float* [[ARRAYIDX]], align 4
; VF-FOUR-CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[INDVARS_IV]]
; VF-FOUR-CHECK-NEXT: [[TMP146:%.*]] = load float, float* [[ARRAYIDX2]], align 4
; VF-FOUR-CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP145]], [[TMP146]]
; VF-FOUR-CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[INDVARS_IV]]
; VF-FOUR-CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4
; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; VF-FOUR-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP12:!llvm.loop !.*]]
; VF-FOUR-CHECK: for.end.loopexit.loopexit:
; VF-FOUR-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]]
; VF-FOUR-CHECK: for.end.loopexit:
; VF-FOUR-CHECK-NEXT: br label [[FOR_END]]
; VF-FOUR-CHECK: for.end:
; VF-FOUR-CHECK-NEXT: ret void
;
entry:
@ -305,6 +564,254 @@ for.end: ; preds = %for.end.loopexit, %
}
define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signext %n) #0 {
; VF-TWO-CHECK-LABEL: @f2(
; VF-TWO-CHECK-NEXT: entry:
; VF-TWO-CHECK-NEXT: [[A1:%.*]] = bitcast float* [[A:%.*]] to i8*
; VF-TWO-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 1
; VF-TWO-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]]
; VF-TWO-CHECK: iter.check:
; VF-TWO-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; VF-TWO-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64
; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2
; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; VF-TWO-CHECK: vector.scevcheck:
; VF-TWO-CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
; VF-TWO-CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; VF-TWO-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
; VF-TWO-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
; VF-TWO-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
; VF-TWO-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]]
; VF-TWO-CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
; VF-TWO-CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
; VF-TWO-CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]]
; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; VF-TWO-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; VF-TWO-CHECK-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]]
; VF-TWO-CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; VF-TWO-CHECK: vector.memcheck:
; VF-TWO-CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A]], i64 [[WIDE_TRIP_COUNT]]
; VF-TWO-CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8*
; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP0]] to i64
; VF-TWO-CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 1
; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[TMP13]], [[WIDE_TRIP_COUNT]]
; VF-TWO-CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr float, float* [[B:%.*]], i64 [[TMP14]]
; VF-TWO-CHECK-NEXT: [[SCEVGEP34:%.*]] = bitcast float* [[SCEVGEP3]] to i8*
; VF-TWO-CHECK-NEXT: [[TMP15:%.*]] = add nsw i64 [[TMP12]], 1
; VF-TWO-CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr float, float* [[B]], i64 [[TMP15]]
; VF-TWO-CHECK-NEXT: [[SCEVGEP56:%.*]] = bitcast float* [[SCEVGEP5]] to i8*
; VF-TWO-CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP56]]
; VF-TWO-CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
; VF-TWO-CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; VF-TWO-CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true
; VF-TWO-CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; VF-TWO-CHECK: vector.main.loop.iter.check:
; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32
; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK7]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; VF-TWO-CHECK: vector.ph:
; VF-TWO-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32
; VF-TWO-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; VF-TWO-CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; VF-TWO-CHECK: vector.body:
; VF-TWO-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 0
; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 4
; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 8
; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 12
; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 16
; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 20
; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 24
; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 28
; VF-TWO-CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
; VF-TWO-CHECK-NEXT: [[TMP24:%.*]] = add i32 [[OFFSET_IDX]], 0
; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 4
; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 8
; VF-TWO-CHECK-NEXT: [[TMP27:%.*]] = add i32 [[OFFSET_IDX]], 12
; VF-TWO-CHECK-NEXT: [[TMP28:%.*]] = add i32 [[OFFSET_IDX]], 16
; VF-TWO-CHECK-NEXT: [[TMP29:%.*]] = add i32 [[OFFSET_IDX]], 20
; VF-TWO-CHECK-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], 24
; VF-TWO-CHECK-NEXT: [[TMP31:%.*]] = add i32 [[OFFSET_IDX]], 28
; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = xor i32 [[TMP24]], -1
; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = xor i32 [[TMP25]], -1
; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = xor i32 [[TMP26]], -1
; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = xor i32 [[TMP27]], -1
; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = xor i32 [[TMP28]], -1
; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = xor i32 [[TMP29]], -1
; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = xor i32 [[TMP30]], -1
; VF-TWO-CHECK-NEXT: [[TMP39:%.*]] = xor i32 [[TMP31]], -1
; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP32]], [[N]]
; VF-TWO-CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP33]], [[N]]
; VF-TWO-CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP34]], [[N]]
; VF-TWO-CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP35]], [[N]]
; VF-TWO-CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP36]], [[N]]
; VF-TWO-CHECK-NEXT: [[TMP45:%.*]] = add i32 [[TMP37]], [[N]]
; VF-TWO-CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP38]], [[N]]
; VF-TWO-CHECK-NEXT: [[TMP47:%.*]] = add i32 [[TMP39]], [[N]]
; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = sext i32 [[TMP40]] to i64
; VF-TWO-CHECK-NEXT: [[TMP49:%.*]] = sext i32 [[TMP41]] to i64
; VF-TWO-CHECK-NEXT: [[TMP50:%.*]] = sext i32 [[TMP42]] to i64
; VF-TWO-CHECK-NEXT: [[TMP51:%.*]] = sext i32 [[TMP43]] to i64
; VF-TWO-CHECK-NEXT: [[TMP52:%.*]] = sext i32 [[TMP44]] to i64
; VF-TWO-CHECK-NEXT: [[TMP53:%.*]] = sext i32 [[TMP45]] to i64
; VF-TWO-CHECK-NEXT: [[TMP54:%.*]] = sext i32 [[TMP46]] to i64
; VF-TWO-CHECK-NEXT: [[TMP55:%.*]] = sext i32 [[TMP47]] to i64
; VF-TWO-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP48]]
; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP49]]
; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP50]]
; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP51]]
; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP52]]
; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP53]]
; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP54]]
; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP55]]
; VF-TWO-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 0
; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, float* [[TMP64]], i32 -3
; VF-TWO-CHECK-NEXT: [[TMP66:%.*]] = bitcast float* [[TMP65]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP66]], align 4, !alias.scope !13
; VF-TWO-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -4
; VF-TWO-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, float* [[TMP67]], i32 -3
; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = bitcast float* [[TMP68]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4, !alias.scope !13
; VF-TWO-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -8
; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, float* [[TMP70]], i32 -3
; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = bitcast float* [[TMP71]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP72]], align 4, !alias.scope !13
; VF-TWO-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -12
; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, float* [[TMP73]], i32 -3
; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = bitcast float* [[TMP74]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4, !alias.scope !13
; VF-TWO-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-TWO-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -16
; VF-TWO-CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds float, float* [[TMP76]], i32 -3
; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = bitcast float* [[TMP77]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP78]], align 4, !alias.scope !13
; VF-TWO-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -20
; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, float* [[TMP79]], i32 -3
; VF-TWO-CHECK-NEXT: [[TMP81:%.*]] = bitcast float* [[TMP80]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4, !alias.scope !13
; VF-TWO-CHECK-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x float> [[WIDE_LOAD16]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-TWO-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -24
; VF-TWO-CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds float, float* [[TMP82]], i32 -3
; VF-TWO-CHECK-NEXT: [[TMP84:%.*]] = bitcast float* [[TMP83]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP84]], align 4, !alias.scope !13
; VF-TWO-CHECK-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x float> [[WIDE_LOAD18]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-TWO-CHECK-NEXT: [[TMP85:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -28
; VF-TWO-CHECK-NEXT: [[TMP86:%.*]] = getelementptr inbounds float, float* [[TMP85]], i32 -3
; VF-TWO-CHECK-NEXT: [[TMP87:%.*]] = bitcast float* [[TMP86]] to <4 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP87]], align 4, !alias.scope !13
; VF-TWO-CHECK-NEXT: [[REVERSE21:%.*]] = shufflevector <4 x float> [[WIDE_LOAD20]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-TWO-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[REVERSE]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[REVERSE9]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; VF-TWO-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[REVERSE11]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; VF-TWO-CHECK-NEXT: [[TMP91:%.*]] = fadd fast <4 x float> [[REVERSE13]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; VF-TWO-CHECK-NEXT: [[TMP92:%.*]] = fadd fast <4 x float> [[REVERSE15]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; VF-TWO-CHECK-NEXT: [[TMP93:%.*]] = fadd fast <4 x float> [[REVERSE17]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; VF-TWO-CHECK-NEXT: [[TMP94:%.*]] = fadd fast <4 x float> [[REVERSE19]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; VF-TWO-CHECK-NEXT: [[TMP95:%.*]] = fadd fast <4 x float> [[REVERSE21]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; VF-TWO-CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
; VF-TWO-CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP17]]
; VF-TWO-CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP18]]
; VF-TWO-CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP19]]
; VF-TWO-CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]]
; VF-TWO-CHECK-NEXT: [[TMP101:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]]
; VF-TWO-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP22]]
; VF-TWO-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP23]]
; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 0
; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = bitcast float* [[TMP104]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP105]], align 4, !alias.scope !16, !noalias !13
; VF-TWO-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 4
; VF-TWO-CHECK-NEXT: [[TMP107:%.*]] = bitcast float* [[TMP106]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP107]], align 4, !alias.scope !16, !noalias !13
; VF-TWO-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 8
; VF-TWO-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP109]], align 4, !alias.scope !16, !noalias !13
; VF-TWO-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 12
; VF-TWO-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP111]], align 4, !alias.scope !16, !noalias !13
; VF-TWO-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 16
; VF-TWO-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP113]], align 4, !alias.scope !16, !noalias !13
; VF-TWO-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 20
; VF-TWO-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP115]], align 4, !alias.scope !16, !noalias !13
; VF-TWO-CHECK-NEXT: [[TMP116:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 24
; VF-TWO-CHECK-NEXT: [[TMP117:%.*]] = bitcast float* [[TMP116]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP117]], align 4, !alias.scope !16, !noalias !13
; VF-TWO-CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 28
; VF-TWO-CHECK-NEXT: [[TMP119:%.*]] = bitcast float* [[TMP118]] to <4 x float>*
; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP119]], align 4, !alias.scope !16, !noalias !13
; VF-TWO-CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32
; VF-TWO-CHECK-NEXT: [[TMP120:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VF-TWO-CHECK-NEXT: br i1 [[TMP120]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP18:!llvm.loop !.*]]
; VF-TWO-CHECK: middle.block:
; VF-TWO-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; VF-TWO-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; VF-TWO-CHECK: vec.epilog.iter.check:
; VF-TWO-CHECK-NEXT: [[IND_END27:%.*]] = trunc i64 [[N_VEC]] to i32
; VF-TWO-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; VF-TWO-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
; VF-TWO-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; VF-TWO-CHECK: vec.epilog.ph:
; VF-TWO-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; VF-TWO-CHECK-NEXT: [[N_MOD_VF22:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2
; VF-TWO-CHECK-NEXT: [[N_VEC23:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF22]]
; VF-TWO-CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC23]] to i32
; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; VF-TWO-CHECK: vec.epilog.vector.body:
; VF-TWO-CHECK-NEXT: [[INDEX24:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; VF-TWO-CHECK-NEXT: [[TMP121:%.*]] = add i64 [[INDEX24]], 0
; VF-TWO-CHECK-NEXT: [[OFFSET_IDX29:%.*]] = trunc i64 [[INDEX24]] to i32
; VF-TWO-CHECK-NEXT: [[TMP122:%.*]] = add i32 [[OFFSET_IDX29]], 0
; VF-TWO-CHECK-NEXT: [[TMP123:%.*]] = xor i32 [[TMP122]], -1
; VF-TWO-CHECK-NEXT: [[TMP124:%.*]] = add i32 [[TMP123]], [[N]]
; VF-TWO-CHECK-NEXT: [[TMP125:%.*]] = sext i32 [[TMP124]] to i64
; VF-TWO-CHECK-NEXT: [[TMP126:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP125]]
; VF-TWO-CHECK-NEXT: [[TMP127:%.*]] = getelementptr inbounds float, float* [[TMP126]], i32 0
; VF-TWO-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP127]], i32 -1
; VF-TWO-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <2 x float>*
; VF-TWO-CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <2 x float>, <2 x float>* [[TMP129]], align 4
; VF-TWO-CHECK-NEXT: [[REVERSE31:%.*]] = shufflevector <2 x float> [[WIDE_LOAD30]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
; VF-TWO-CHECK-NEXT: [[TMP130:%.*]] = fadd fast <2 x float> [[REVERSE31]], <float 1.000000e+00, float 1.000000e+00>
; VF-TWO-CHECK-NEXT: [[TMP131:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP121]]
; VF-TWO-CHECK-NEXT: [[TMP132:%.*]] = getelementptr inbounds float, float* [[TMP131]], i32 0
; VF-TWO-CHECK-NEXT: [[TMP133:%.*]] = bitcast float* [[TMP132]] to <2 x float>*
; VF-TWO-CHECK-NEXT: store <2 x float> [[TMP130]], <2 x float>* [[TMP133]], align 4
; VF-TWO-CHECK-NEXT: [[INDEX_NEXT25]] = add i64 [[INDEX24]], 2
; VF-TWO-CHECK-NEXT: [[TMP134:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC23]]
; VF-TWO-CHECK-NEXT: br i1 [[TMP134]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP19:!llvm.loop !.*]]
; VF-TWO-CHECK: vec.epilog.middle.block:
; VF-TWO-CHECK-NEXT: [[CMP_N28:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC23]]
; VF-TWO-CHECK-NEXT: br i1 [[CMP_N28]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
; VF-TWO-CHECK: vec.epilog.scalar.ph:
; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ]
; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END27]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ]
; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]]
; VF-TWO-CHECK: for.body:
; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; VF-TWO-CHECK-NEXT: [[I_014:%.*]] = phi i32 [ [[BC_RESUME_VAL26]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; VF-TWO-CHECK-NEXT: [[TMP135:%.*]] = xor i32 [[I_014]], -1
; VF-TWO-CHECK-NEXT: [[SUB2:%.*]] = add i32 [[TMP135]], [[N]]
; VF-TWO-CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB2]] to i64
; VF-TWO-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IDXPROM]]
; VF-TWO-CHECK-NEXT: [[TMP136:%.*]] = load float, float* [[ARRAYIDX]], align 4
; VF-TWO-CHECK-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP136]], 1.000000e+00
; VF-TWO-CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
; VF-TWO-CHECK-NEXT: store float [[CONV3]], float* [[ARRAYIDX5]], align 4
; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; VF-TWO-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1
; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP20:!llvm.loop !.*]]
; VF-TWO-CHECK: for.end.loopexit.loopexit:
; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]]
; VF-TWO-CHECK: for.end.loopexit:
; VF-TWO-CHECK-NEXT: br label [[FOR_END]]
; VF-TWO-CHECK: for.end:
; VF-TWO-CHECK-NEXT: ret i32 0
;
; VF-FOUR-CHECK-LABEL: @f2(
; VF-FOUR-CHECK-NEXT: entry:
; VF-FOUR-CHECK-NEXT: [[A1:%.*]] = bitcast float* [[A:%.*]] to i8*
@ -330,7 +837,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; VF-FOUR-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; VF-FOUR-CHECK-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MEM_CHECK:%.*]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; VF-FOUR-CHECK: vector.memcheck:
; VF-FOUR-CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A]], i64 [[WIDE_TRIP_COUNT]]
; VF-FOUR-CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8*
@ -408,43 +915,43 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; VF-FOUR-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 0
; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, float* [[TMP64]], i32 -3
; VF-FOUR-CHECK-NEXT: [[TMP66:%.*]] = bitcast float* [[TMP65]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP66]], align 4
; VF-FOUR-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP66]], align 4, !alias.scope !13
; VF-FOUR-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -4
; VF-FOUR-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, float* [[TMP67]], i32 -3
; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = bitcast float* [[TMP68]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4
; VF-FOUR-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4, !alias.scope !13
; VF-FOUR-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -8
; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, float* [[TMP70]], i32 -3
; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = bitcast float* [[TMP71]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP72]], align 4
; VF-FOUR-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP72]], align 4, !alias.scope !13
; VF-FOUR-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -12
; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, float* [[TMP73]], i32 -3
; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = bitcast float* [[TMP74]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4
; VF-FOUR-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4, !alias.scope !13
; VF-FOUR-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -16
; VF-FOUR-CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds float, float* [[TMP76]], i32 -3
; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = bitcast float* [[TMP77]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP78]], align 4
; VF-FOUR-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP78]], align 4, !alias.scope !13
; VF-FOUR-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -20
; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, float* [[TMP79]], i32 -3
; VF-FOUR-CHECK-NEXT: [[TMP81:%.*]] = bitcast float* [[TMP80]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4
; VF-FOUR-CHECK-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x float> [[WIDE_LOAD16]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4, !alias.scope !13
; VF-FOUR-CHECK-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x float> [[WIDE_LOAD16]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -24
; VF-FOUR-CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds float, float* [[TMP82]], i32 -3
; VF-FOUR-CHECK-NEXT: [[TMP84:%.*]] = bitcast float* [[TMP83]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP84]], align 4
; VF-FOUR-CHECK-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x float> [[WIDE_LOAD18]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP84]], align 4, !alias.scope !13
; VF-FOUR-CHECK-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x float> [[WIDE_LOAD18]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[TMP85:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -28
; VF-FOUR-CHECK-NEXT: [[TMP86:%.*]] = getelementptr inbounds float, float* [[TMP85]], i32 -3
; VF-FOUR-CHECK-NEXT: [[TMP87:%.*]] = bitcast float* [[TMP86]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP87]], align 4
; VF-FOUR-CHECK-NEXT: [[REVERSE21:%.*]] = shufflevector <4 x float> [[WIDE_LOAD20]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP87]], align 4, !alias.scope !13
; VF-FOUR-CHECK-NEXT: [[REVERSE21:%.*]] = shufflevector <4 x float> [[WIDE_LOAD20]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[REVERSE]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[REVERSE9]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[REVERSE11]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@ -463,31 +970,31 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; VF-FOUR-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP23]]
; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 0
; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = bitcast float* [[TMP104]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP105]], align 4
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP105]], align 4, !alias.scope !16, !noalias !13
; VF-FOUR-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 4
; VF-FOUR-CHECK-NEXT: [[TMP107:%.*]] = bitcast float* [[TMP106]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP107]], align 4
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP107]], align 4, !alias.scope !16, !noalias !13
; VF-FOUR-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 8
; VF-FOUR-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP109]], align 4
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP109]], align 4, !alias.scope !16, !noalias !13
; VF-FOUR-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 12
; VF-FOUR-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP111]], align 4
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP111]], align 4, !alias.scope !16, !noalias !13
; VF-FOUR-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 16
; VF-FOUR-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP113]], align 4
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP113]], align 4, !alias.scope !16, !noalias !13
; VF-FOUR-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 20
; VF-FOUR-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP115]], align 4
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP115]], align 4, !alias.scope !16, !noalias !13
; VF-FOUR-CHECK-NEXT: [[TMP116:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 24
; VF-FOUR-CHECK-NEXT: [[TMP117:%.*]] = bitcast float* [[TMP116]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP117]], align 4
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP117]], align 4, !alias.scope !16, !noalias !13
; VF-FOUR-CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 28
; VF-FOUR-CHECK-NEXT: [[TMP119:%.*]] = bitcast float* [[TMP118]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP119]], align 4
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP119]], align 4, !alias.scope !16, !noalias !13
; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32
; VF-FOUR-CHECK-NEXT: [[TMP120:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP120]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOPID_MV_CM:!.*]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP120]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP18:!llvm.loop !.*]]
; VF-FOUR-CHECK: middle.block:
; VF-FOUR-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
@ -515,7 +1022,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; VF-FOUR-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP127]], i32 -3
; VF-FOUR-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <4 x float>*
; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <4 x float>, <4 x float>* [[TMP129]], align 4
; VF-FOUR-CHECK-NEXT: [[REVERSE31:%.*]] = shufflevector <4 x float> [[WIDE_LOAD30]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[REVERSE31:%.*]] = shufflevector <4 x float> [[WIDE_LOAD30]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; VF-FOUR-CHECK-NEXT: [[TMP130:%.*]] = fadd fast <4 x float> [[REVERSE31]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; VF-FOUR-CHECK-NEXT: [[TMP131:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP121]]
; VF-FOUR-CHECK-NEXT: [[TMP132:%.*]] = getelementptr inbounds float, float* [[TMP131]], i32 0
@ -523,13 +1030,13 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP130]], <4 x float>* [[TMP133]], align 4
; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT25]] = add i64 [[INDEX24]], 4
; VF-FOUR-CHECK-NEXT: [[TMP134:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC23]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP134]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOPID_EV_CM:!.*]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP134]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP19:!llvm.loop !.*]]
; VF-FOUR-CHECK: vec.epilog.middle.block:
; VF-FOUR-CHECK-NEXT: [[CMP_N28:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC23]]
; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N28]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
; VF-FOUR-CHECK: vec.epilog.scalar.ph:
; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEM_CHECK]] ], [ 0, [[ITER_CHECK]] ]
; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END27]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEM_CHECK]] ], [ 0, [[ITER_CHECK]] ]
; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ]
; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END27]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ]
; VF-FOUR-CHECK-NEXT: br label [[FOR_BODY:%.*]]
; VF-FOUR-CHECK: for.body:
; VF-FOUR-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@ -545,18 +1052,15 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; VF-FOUR-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1
; VF-FOUR-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOPID_MS_CM:!.*]]
; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP20:!llvm.loop !.*]]
; VF-FOUR-CHECK: for.end.loopexit.loopexit:
; VF-FOUR-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]]
; VF-FOUR-CHECK: for.end.loopexit:
; VF-FOUR-CHECK-NEXT: br label [[FOR_END]]
; VF-FOUR-CHECK: for.end:
; VF-FOUR-CHECK-NEXT: ret i32 0
;
; VF-FOUR-CHECK-DAG: [[LOOPID_MV_CM]] = distinct !{[[LOOPID_MV_CM]], [[LOOPID_DISABLE_VECT_CM:!.*]]}
; VF-FOUR-CHECK-DAG: [[LOOPID_EV_CM]] = distinct !{[[LOOPID_EV_CM]], [[LOOPID_DISABLE_UNROLL_CM:!.*]], [[LOOPID_DISABLE_VECT_CM:!.*]]}
; VF-FOUR-CHECK-DAG: [[LOOPID_DISABLE_VECT_CM]] = [[DISABLE_VECT_STR_CM:!{!"llvm.loop.isvectorized".*}.*]]
; VF-FOUR-CHECK-DAG: [[LOOPID_DISABLE_UNROLL_CM]] = [[DISABLE_UNROLL_STR_CM:!{!"llvm.loop.unroll.runtime.disable"}.*]]
entry:
%cmp1 = icmp sgt i32 %n, 1

View File

@ -20,8 +20,8 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK-NOT: LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
; CHECK-NOT: LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 5
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x float> undef, float %x, i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x float> [[BROADCAST_SPLATINSERT]], <16 x float> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float %x, i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x float> [[BROADCAST_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label %vector.body
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
@ -30,13 +30,13 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <80 x float>*
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <80 x float>, <80 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <80 x float> [[WIDE_VEC]], <80 x float> undef, <16 x i32> <i32 0, i32 5, i32 10, i32 15, i32 20, i32 25, i32 30, i32 35, i32 40, i32 45, i32 50, i32 55, i32 60, i32 65, i32 70, i32 75>
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <80 x float> [[WIDE_VEC]], <80 x float> poison, <16 x i32> <i32 0, i32 5, i32 10, i32 15, i32 20, i32 25, i32 30, i32 35, i32 40, i32 45, i32 50, i32 55, i32 60, i32 65, i32 70, i32 75>
; CHECK-NEXT: [[TMP2:%.*]] = fmul <16 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds %data, %data* %d, i64 0, i32 0, <16 x i64> [[VEC_IND]]
; CHECK-NEXT: [[BC:%.*]] = bitcast <16 x float*> [[TMP3]] to <16 x <80 x float>*>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x <80 x float>*> [[BC]], i32 0
; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <80 x float>, <80 x float>* [[TMP4]], align 4
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <80 x float> [[WIDE_VEC1]], <80 x float> undef, <16 x i32> <i32 0, i32 5, i32 10, i32 15, i32 20, i32 25, i32 30, i32 35, i32 40, i32 45, i32 50, i32 55, i32 60, i32 65, i32 70, i32 75>
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <80 x float> [[WIDE_VEC1]], <80 x float> poison, <16 x i32> <i32 0, i32 5, i32 10, i32 15, i32 20, i32 25, i32 30, i32 35, i32 40, i32 45, i32 50, i32 55, i32 60, i32 65, i32 70, i32 75>
; CHECK-NEXT: [[TMP5:%.*]] = fadd <16 x float> [[STRIDED_VEC2]], [[TMP2]]
; CHECK-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[TMP5]], <16 x float*> [[TMP3]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16

View File

@ -19,8 +19,8 @@ define void @f1() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[OFFSET_IDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], <i16 0, i16 1>
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[TMP0]] to i64
@ -30,7 +30,7 @@ define void @f1() {
; CHECK-NEXT: store <2 x i16*> <i16* getelementptr inbounds ([1 x %rec8], [1 x %rec8]* @a, i32 0, i32 0, i32 0), i16* getelementptr inbounds ([1 x %rec8], [1 x %rec8]* @a, i32 0, i32 0, i32 0)>, <2 x i16*>* [[TMP4]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 2
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 2, 2
; CHECK-NEXT: br i1 [[CMP_N]], label [[BB3:%.*]], label [[SCALAR_PH]]
@ -44,10 +44,10 @@ define void @f1() {
; CHECK-NEXT: [[_TMP4:%.*]] = bitcast %rec8* [[_TMP2]] to i16*
; CHECK-NEXT: [[_TMP6:%.*]] = sext i16 [[C_1_0]] to i64
; CHECK-NEXT: [[_TMP7:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[_TMP6]]
; CHECK-NEXT: store i16* [[_TMP4]], i16** [[_TMP7]]
; CHECK-NEXT: store i16* [[_TMP4]], i16** [[_TMP7]], align 8
; CHECK-NEXT: [[_TMP9]] = add nsw i16 [[C_1_0]], 1
; CHECK-NEXT: [[_TMP11:%.*]] = icmp slt i16 [[_TMP9]], 2
; CHECK-NEXT: br i1 [[_TMP11]], label [[BB2]], label [[BB3]], !llvm.loop !2
; CHECK-NEXT: br i1 [[_TMP11]], label [[BB2]], label [[BB3]], [[LOOP2:!llvm.loop !.*]]
; CHECK: bb3:
; CHECK-NEXT: ret void
;

View File

@ -18,10 +18,10 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 {
; CHECK: if.then:
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> undef, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i8> undef, i8 [[X]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT2]], <4 x i8> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i8> poison, i8 [[X]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT2]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -36,11 +36,11 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw <4 x i32> [[TMP4]], <i32 24, i32 24, i32 24, i32 24>
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw <4 x i32> [[TMP5]], <i32 24, i32 24, i32 24, i32 24>
; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[P:%.*]], align 1, [[TBAA1:!tbaa !.*]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> undef, i8 [[TMP8]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT4]], <4 x i8> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP8]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT4]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> undef, i8 [[TMP9]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32>
; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw <4 x i32> [[TMP10]], <i32 16, i32 16, i32 16, i32 16>

View File

@ -37,8 +37,8 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b)
; AVX-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
; AVX-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <8 x i32>*
; AVX-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4
; AVX-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; AVX-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; AVX-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; AVX-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; AVX-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC]]
; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; AVX-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*

View File

@ -28,10 +28,10 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK5]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32*> undef, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT]], <16 x i32*> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> undef, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT6]], <16 x i32> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32*> poison, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT]], <16 x i32*> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -57,10 +57,10 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) {
; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i64 [[N]], 1
; CHECK-NEXT: [[SMAX9:%.*]] = select i1 [[TMP7]], i64 [[N]], i64 1
; CHECK-NEXT: [[N_VEC11:%.*]] = and i64 [[SMAX9]], 9223372036854775800
; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <8 x i32*> undef, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT17:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT16]], <8 x i32*> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <8 x i32> undef, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT18]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT17:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT16]], <8 x i32*> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT18]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]

View File

@ -126,12 +126,12 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b,
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK5]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[K:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> undef, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT6]], <16 x i32> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <16 x i32*> undef, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT8]], <16 x i32*> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[K:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <16 x i32*> poison, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT8]], <16 x i32*> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -157,12 +157,12 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b,
; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i64 [[N]], 1
; CHECK-NEXT: [[SMAX10:%.*]] = select i1 [[TMP7]], i64 [[N]], i64 1
; CHECK-NEXT: [[N_VEC12:%.*]] = and i64 [[SMAX10]], 9223372036854775800
; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <8 x i32> undef, i32 [[K]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT17]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <8 x i32> undef, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT19]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <8 x i32*> undef, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT21]], <8 x i32*> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT17]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT19]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT21]], <8 x i32*> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX13:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
@ -260,12 +260,12 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32*
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK16]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[K:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <16 x i32> undef, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT17]], <16 x i32> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <16 x i32*> undef, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT19]], <16 x i32*> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[K:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT17]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <16 x i32*> poison, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT19]], <16 x i32*> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -294,12 +294,12 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32*
; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i64 [[N]], 1
; CHECK-NEXT: [[SMAX21:%.*]] = select i1 [[TMP9]], i64 [[N]], i64 1
; CHECK-NEXT: [[N_VEC23:%.*]] = and i64 [[SMAX21]], 9223372036854775800
; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <8 x i32> undef, i32 [[K]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT28]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <8 x i32> undef, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT30]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT33:%.*]] = insertelement <8 x i32*> undef, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT34:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT33]], <8 x i32*> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT28]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT30]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT33:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT34:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT33]], <8 x i32*> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX24:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]

View File

@ -22,14 +22,14 @@ define i32 @test_explicit_pred(i64 %len) {
; CHECK-NEXT: call void @init(i32* [[BASE]])
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[LEN:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> undef, i64 [[LEN]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i64> undef, i64 [[LEN]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT9]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x i64> undef, i64 [[LEN]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT11]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[LEN:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT9]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT11]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

View File

@ -1683,22 +1683,22 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s
; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -3
; AVX2-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !41
; AVX2-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -4
; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -3
; AVX2-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
; AVX2-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4, !alias.scope !41
; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD12]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD12]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8
; AVX2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -3
; AVX2-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
; AVX2-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4, !alias.scope !41
; AVX2-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD14]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD14]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -12
; AVX2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -3
; AVX2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
; AVX2-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4, !alias.scope !41
; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD16]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD16]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer
; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt <4 x i32> [[REVERSE13]], zeroinitializer
; AVX2-NEXT: [[TMP22:%.*]] = icmp sgt <4 x i32> [[REVERSE15]], zeroinitializer
@ -1709,28 +1709,28 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s
; AVX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP3]]
; AVX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 0
; AVX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -3
; AVX2-NEXT: [[REVERSE18:%.*]] = shufflevector <4 x i1> [[TMP20]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE18:%.*]] = shufflevector <4 x i1> [[TMP20]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP30]], i32 8, <4 x i1> [[REVERSE18]], <4 x double> undef), !alias.scope !44
; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -4
; AVX2-NEXT: [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -3
; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP21]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP21]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP33]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> undef), !alias.scope !44
; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8
; AVX2-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -3
; AVX2-NEXT: [[REVERSE23:%.*]] = shufflevector <4 x i1> [[TMP22]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE23:%.*]] = shufflevector <4 x i1> [[TMP22]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <4 x double>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP36]], i32 8, <4 x i1> [[REVERSE23]], <4 x double> undef), !alias.scope !44
; AVX2-NEXT: [[REVERSE25:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD24]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE25:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD24]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -12
; AVX2-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -3
; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <4 x double>*
; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP39]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !44
; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD27]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD27]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP40:%.*]] = fadd <4 x double> [[REVERSE19]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
; AVX2-NEXT: [[TMP41:%.*]] = fadd <4 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
; AVX2-NEXT: [[TMP42:%.*]] = fadd <4 x double> [[REVERSE25]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
@ -1739,22 +1739,22 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s
; AVX2-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
; AVX2-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
; AVX2-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
; AVX2-NEXT: [[REVERSE29:%.*]] = shufflevector <4 x double> [[TMP40]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE29:%.*]] = shufflevector <4 x double> [[TMP40]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
; AVX2-NEXT: [[TMP49:%.*]] = getelementptr inbounds double, double* [[TMP48]], i32 -3
; AVX2-NEXT: [[TMP50:%.*]] = bitcast double* [[TMP49]] to <4 x double>*
; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE29]], <4 x double>* [[TMP50]], i32 8, <4 x i1> [[REVERSE18]]), !alias.scope !46, !noalias !48
; AVX2-NEXT: [[REVERSE31:%.*]] = shufflevector <4 x double> [[TMP41]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE31:%.*]] = shufflevector <4 x double> [[TMP41]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP51:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -4
; AVX2-NEXT: [[TMP52:%.*]] = getelementptr inbounds double, double* [[TMP51]], i32 -3
; AVX2-NEXT: [[TMP53:%.*]] = bitcast double* [[TMP52]] to <4 x double>*
; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE31]], <4 x double>* [[TMP53]], i32 8, <4 x i1> [[REVERSE20]]), !alias.scope !46, !noalias !48
; AVX2-NEXT: [[REVERSE33:%.*]] = shufflevector <4 x double> [[TMP42]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE33:%.*]] = shufflevector <4 x double> [[TMP42]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP54:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -8
; AVX2-NEXT: [[TMP55:%.*]] = getelementptr inbounds double, double* [[TMP54]], i32 -3
; AVX2-NEXT: [[TMP56:%.*]] = bitcast double* [[TMP55]] to <4 x double>*
; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE33]], <4 x double>* [[TMP56]], i32 8, <4 x i1> [[REVERSE23]]), !alias.scope !46, !noalias !48
; AVX2-NEXT: [[REVERSE35:%.*]] = shufflevector <4 x double> [[TMP43]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE35:%.*]] = shufflevector <4 x double> [[TMP43]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP57:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -12
; AVX2-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP57]], i32 -3
; AVX2-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
@ -1827,22 +1827,22 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s
; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -7
; AVX512-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>*
; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4, !alias.scope !55
; AVX512-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8
; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -7
; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>*
; AVX512-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !55
; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD12]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD12]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -16
; AVX512-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -7
; AVX512-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <8 x i32>*
; AVX512-NEXT: [[WIDE_LOAD14:%.*]] = load <8 x i32>, <8 x i32>* [[TMP16]], align 4, !alias.scope !55
; AVX512-NEXT: [[REVERSE15:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD14]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE15:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD14]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -24
; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -7
; AVX512-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <8 x i32>*
; AVX512-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x i32>, <8 x i32>* [[TMP19]], align 4, !alias.scope !55
; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD16]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD16]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP20:%.*]] = icmp sgt <8 x i32> [[REVERSE]], zeroinitializer
; AVX512-NEXT: [[TMP21:%.*]] = icmp sgt <8 x i32> [[REVERSE13]], zeroinitializer
; AVX512-NEXT: [[TMP22:%.*]] = icmp sgt <8 x i32> [[REVERSE15]], zeroinitializer
@ -1853,28 +1853,28 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s
; AVX512-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP3]]
; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 0
; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -7
; AVX512-NEXT: [[REVERSE18:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE18:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <8 x double>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP30]], i32 8, <8 x i1> [[REVERSE18]], <8 x double> undef), !alias.scope !58
; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8
; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -7
; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP21]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP21]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <8 x double>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP33]], i32 8, <8 x i1> [[REVERSE20]], <8 x double> undef), !alias.scope !58
; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -16
; AVX512-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -7
; AVX512-NEXT: [[REVERSE23:%.*]] = shufflevector <8 x i1> [[TMP22]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE23:%.*]] = shufflevector <8 x i1> [[TMP22]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <8 x double>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP36]], i32 8, <8 x i1> [[REVERSE23]], <8 x double> undef), !alias.scope !58
; AVX512-NEXT: [[REVERSE25:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD24]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE25:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD24]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -24
; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -7
; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x i1> [[TMP23]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x i1> [[TMP23]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <8 x double>*
; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP39]], i32 8, <8 x i1> [[REVERSE26]], <8 x double> undef), !alias.scope !58
; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD27]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD27]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP40:%.*]] = fadd <8 x double> [[REVERSE19]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
; AVX512-NEXT: [[TMP41:%.*]] = fadd <8 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
; AVX512-NEXT: [[TMP42:%.*]] = fadd <8 x double> [[REVERSE25]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
@ -1883,22 +1883,22 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s
; AVX512-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
; AVX512-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
; AVX512-NEXT: [[REVERSE29:%.*]] = shufflevector <8 x double> [[TMP40]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE29:%.*]] = shufflevector <8 x double> [[TMP40]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
; AVX512-NEXT: [[TMP49:%.*]] = getelementptr inbounds double, double* [[TMP48]], i32 -7
; AVX512-NEXT: [[TMP50:%.*]] = bitcast double* [[TMP49]] to <8 x double>*
; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE29]], <8 x double>* [[TMP50]], i32 8, <8 x i1> [[REVERSE18]]), !alias.scope !60, !noalias !62
; AVX512-NEXT: [[REVERSE31:%.*]] = shufflevector <8 x double> [[TMP41]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE31:%.*]] = shufflevector <8 x double> [[TMP41]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP51:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -8
; AVX512-NEXT: [[TMP52:%.*]] = getelementptr inbounds double, double* [[TMP51]], i32 -7
; AVX512-NEXT: [[TMP53:%.*]] = bitcast double* [[TMP52]] to <8 x double>*
; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE31]], <8 x double>* [[TMP53]], i32 8, <8 x i1> [[REVERSE20]]), !alias.scope !60, !noalias !62
; AVX512-NEXT: [[REVERSE33:%.*]] = shufflevector <8 x double> [[TMP42]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE33:%.*]] = shufflevector <8 x double> [[TMP42]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP54:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -16
; AVX512-NEXT: [[TMP55:%.*]] = getelementptr inbounds double, double* [[TMP54]], i32 -7
; AVX512-NEXT: [[TMP56:%.*]] = bitcast double* [[TMP55]] to <8 x double>*
; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE33]], <8 x double>* [[TMP56]], i32 8, <8 x i1> [[REVERSE23]]), !alias.scope !60, !noalias !62
; AVX512-NEXT: [[REVERSE35:%.*]] = shufflevector <8 x double> [[TMP43]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE35:%.*]] = shufflevector <8 x double> [[TMP43]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP57:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -24
; AVX512-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP57]], i32 -7
; AVX512-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <8 x double>*

View File

@ -20,8 +20,8 @@ target triple = "x86_64-unknown-linux-gnu"
define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
; O1-LABEL: @enabled(
; O1-NEXT: entry:
; O1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; O1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; O1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; O1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; O1-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; O1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; O1-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@ -137,8 +137,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %
;
; O2-LABEL: @enabled(
; O2-NEXT: entry:
; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; O2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; O2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@ -254,8 +254,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %
;
; O3-LABEL: @enabled(
; O3-NEXT: entry:
; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; O3-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; O3-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@ -371,8 +371,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %
;
; O3DEFAULT-LABEL: @enabled(
; O3DEFAULT-NEXT: entry:
; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; O3DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; O3DEFAULT-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@ -488,8 +488,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %
;
; Os-LABEL: @enabled(
; Os-NEXT: entry:
; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; Os-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; Os-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; Os-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@ -605,8 +605,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %
;
; Oz-LABEL: @enabled(
; Oz-NEXT: entry:
; Oz-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; Oz-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; Oz-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; Oz-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; Oz-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; Oz-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; Oz-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@ -722,8 +722,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %
;
; O1VEC2-LABEL: @enabled(
; O1VEC2-NEXT: entry:
; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; O1VEC2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; O1VEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; O1VEC2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@ -839,8 +839,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %
;
; OzVEC2-LABEL: @enabled(
; OzVEC2-NEXT: entry:
; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; OzVEC2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; OzVEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; OzVEC2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@ -956,8 +956,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %
;
; O3DIS-LABEL: @enabled(
; O3DIS-NEXT: entry:
; O3DIS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; O3DIS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; O3DIS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; O3DIS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; O3DIS-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; O3DIS-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; O3DIS-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@ -1110,8 +1110,8 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly
;
; O2-LABEL: @nopragma(
; O2-NEXT: entry:
; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; O2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; O2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@ -1227,8 +1227,8 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly
;
; O3-LABEL: @nopragma(
; O3-NEXT: entry:
; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; O3-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; O3-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@ -1344,8 +1344,8 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly
;
; O3DEFAULT-LABEL: @nopragma(
; O3DEFAULT-NEXT: entry:
; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; O3DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; O3DEFAULT-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@ -1461,8 +1461,8 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly
;
; Os-LABEL: @nopragma(
; Os-NEXT: entry:
; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; Os-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; Os-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; Os-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@ -1597,8 +1597,8 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly
; O1VEC2-NEXT: entry:
; O1VEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; O1VEC2: vector.ph:
; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; O1VEC2-NEXT: br label [[VECTOR_BODY:%.*]]
; O1VEC2: vector.body:
; O1VEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -1614,7 +1614,7 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly
; O1VEC2-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP7]], align 4
; O1VEC2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; O1VEC2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; O1VEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; O1VEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; O1VEC2: middle.block:
; O1VEC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64
; O1VEC2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@ -1630,7 +1630,7 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly
; O1VEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2
; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]]
; O1VEC2: for.end:
; O1VEC2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A]], align 4
; O1VEC2-NEXT: ret i32 [[TMP10]]
@ -1639,8 +1639,8 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly
; OzVEC2-NEXT: entry:
; OzVEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; OzVEC2: vector.ph:
; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; OzVEC2-NEXT: br label [[VECTOR_BODY:%.*]]
; OzVEC2: vector.body:
; OzVEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -1656,7 +1656,7 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly
; OzVEC2-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP7]], align 4
; OzVEC2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; OzVEC2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; OzVEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; OzVEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; OzVEC2: middle.block:
; OzVEC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64
; OzVEC2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@ -1672,7 +1672,7 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly
; OzVEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2
; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]]
; OzVEC2: for.end:
; OzVEC2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A]], align 4
; OzVEC2-NEXT: ret i32 [[TMP10]]
@ -1726,7 +1726,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly
; O1-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
; O1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; O1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
; O1-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0
; O1-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; O1: for.end:
; O1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
; O1-NEXT: ret i32 [[TMP1]]
@ -1743,7 +1743,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly
; O2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
; O2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; O2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
; O2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0
; O2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; O2: for.end:
; O2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
; O2-NEXT: ret i32 [[TMP1]]
@ -1760,7 +1760,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly
; O3-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
; O3-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; O3-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
; O3-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0
; O3-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; O3: for.end:
; O3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
; O3-NEXT: ret i32 [[TMP1]]
@ -1866,7 +1866,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly
; Os-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
; Os-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; Os-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
; Os-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0
; Os-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; Os: for.end:
; Os-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
; Os-NEXT: ret i32 [[TMP1]]
@ -1883,7 +1883,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly
; Oz-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
; Oz-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; Oz-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
; Oz-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0
; Oz-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; Oz: for.end:
; Oz-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
; Oz-NEXT: ret i32 [[TMP1]]
@ -1900,7 +1900,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly
; O1VEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !4
; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; O1VEC2: for.end:
; O1VEC2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
; O1VEC2-NEXT: ret i32 [[TMP1]]
@ -1917,7 +1917,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly
; OzVEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !4
; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; OzVEC2: for.end:
; OzVEC2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
; OzVEC2-NEXT: ret i32 [[TMP1]]
@ -1934,7 +1934,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly
; O3DIS-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
; O3DIS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; O3DIS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
; O3DIS-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0
; O3DIS-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; O3DIS: for.end:
; O3DIS-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
; O3DIS-NEXT: ret i32 [[TMP1]]

View File

@ -17,8 +17,8 @@ define i32 @foo_optsize() #0 {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> undef, <64 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[INDUCTION]], <i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202>
@ -32,7 +32,7 @@ define i32 @foo_optsize() #0 {
; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> [[TMP6]], <64 x i8>* [[TMP7]], i32 1, <64 x i1> [[TMP1]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@ -47,7 +47,7 @@ define i32 @foo_optsize() #0 {
; CHECK-NEXT: store i8 [[DOT]], i8* [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
;
@ -58,8 +58,8 @@ define i32 @foo_optsize() #0 {
; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]]
; AUTOVF: vector.body:
; AUTOVF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> undef, i32 [[INDEX]], i32 0
; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> undef, <32 x i32> zeroinitializer
; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i32 0
; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
; AUTOVF-NEXT: [[INDUCTION:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[INDUCTION]], <i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202>
@ -73,7 +73,7 @@ define i32 @foo_optsize() #0 {
; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> [[TMP6]], <32 x i8>* [[TMP7]], i32 1, <32 x i1> [[TMP1]])
; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32
; AUTOVF-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224
; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; AUTOVF: middle.block:
; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; AUTOVF: scalar.ph:
@ -88,7 +88,7 @@ define i32 @foo_optsize() #0 {
; AUTOVF-NEXT: store i8 [[DOT]], i8* [[ARRAYIDX]], align 1
; AUTOVF-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
; AUTOVF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
; AUTOVF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2
; AUTOVF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]]
; AUTOVF: for.end:
; AUTOVF-NEXT: ret i32 0
;
@ -121,8 +121,8 @@ define i32 @foo_minsize() #1 {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> undef, <64 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[INDUCTION]], <i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202>
@ -136,7 +136,7 @@ define i32 @foo_minsize() #1 {
; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> [[TMP6]], <64 x i8>* [[TMP7]], i32 1, <64 x i1> [[TMP1]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@ -151,7 +151,7 @@ define i32 @foo_minsize() #1 {
; CHECK-NEXT: store i8 [[DOT]], i8* [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !5
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
;
@ -162,8 +162,8 @@ define i32 @foo_minsize() #1 {
; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]]
; AUTOVF: vector.body:
; AUTOVF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> undef, i32 [[INDEX]], i32 0
; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> undef, <32 x i32> zeroinitializer
; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i32 0
; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
; AUTOVF-NEXT: [[INDUCTION:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[INDUCTION]], <i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202>
@ -177,7 +177,7 @@ define i32 @foo_minsize() #1 {
; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> [[TMP6]], <32 x i8>* [[TMP7]], i32 1, <32 x i1> [[TMP1]])
; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32
; AUTOVF-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224
; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4
; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; AUTOVF: middle.block:
; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; AUTOVF: scalar.ph:
@ -192,7 +192,7 @@ define i32 @foo_minsize() #1 {
; AUTOVF-NEXT: store i8 [[DOT]], i8* [[ARRAYIDX]], align 1
; AUTOVF-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
; AUTOVF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
; AUTOVF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !5
; AUTOVF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]]
; AUTOVF: for.end:
; AUTOVF-NEXT: ret i32 0
;
@ -224,14 +224,103 @@ define void @scev4stride1(i32* noalias nocapture %a, i32* noalias nocapture read
; CHECK-NEXT: for.body.preheader:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> undef, i32 [[K:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> undef, <64 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[K:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <64 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 9
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 10
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 11
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 12
; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 13
; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[INDEX]], 14
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[INDEX]], 15
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 16
; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[INDEX]], 17
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[INDEX]], 18
; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[INDEX]], 19
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[INDEX]], 20
; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[INDEX]], 21
; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[INDEX]], 22
; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[INDEX]], 23
; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 24
; CHECK-NEXT: [[TMP25:%.*]] = add i32 [[INDEX]], 25
; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 26
; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[INDEX]], 27
; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[INDEX]], 28
; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[INDEX]], 29
; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[INDEX]], 30
; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[INDEX]], 31
; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 32
; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[INDEX]], 33
; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[INDEX]], 34
; CHECK-NEXT: [[TMP35:%.*]] = add i32 [[INDEX]], 35
; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[INDEX]], 36
; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[INDEX]], 37
; CHECK-NEXT: [[TMP38:%.*]] = add i32 [[INDEX]], 38
; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[INDEX]], 39
; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[INDEX]], 40
; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[INDEX]], 41
; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[INDEX]], 42
; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[INDEX]], 43
; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[INDEX]], 44
; CHECK-NEXT: [[TMP45:%.*]] = add i32 [[INDEX]], 45
; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[INDEX]], 46
; CHECK-NEXT: [[TMP47:%.*]] = add i32 [[INDEX]], 47
; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[INDEX]], 48
; CHECK-NEXT: [[TMP49:%.*]] = add i32 [[INDEX]], 49
; CHECK-NEXT: [[TMP50:%.*]] = add i32 [[INDEX]], 50
; CHECK-NEXT: [[TMP51:%.*]] = add i32 [[INDEX]], 51
; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 52
; CHECK-NEXT: [[TMP53:%.*]] = add i32 [[INDEX]], 53
; CHECK-NEXT: [[TMP54:%.*]] = add i32 [[INDEX]], 54
; CHECK-NEXT: [[TMP55:%.*]] = add i32 [[INDEX]], 55
; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[INDEX]], 56
; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[INDEX]], 57
; CHECK-NEXT: [[TMP58:%.*]] = add i32 [[INDEX]], 58
; CHECK-NEXT: [[TMP59:%.*]] = add i32 [[INDEX]], 59
; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[INDEX]], 60
; CHECK-NEXT: [[TMP61:%.*]] = add i32 [[INDEX]], 61
; CHECK-NEXT: [[TMP62:%.*]] = add i32 [[INDEX]], 62
; CHECK-NEXT: [[TMP63:%.*]] = add i32 [[INDEX]], 63
; CHECK-NEXT: [[TMP64:%.*]] = mul nsw <64 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], <64 x i32> [[TMP64]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <64 x i32> @llvm.masked.gather.v64i32.v64p0i32(<64 x i32*> [[TMP65]], i32 4, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <64 x i32> undef)
; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, i32* [[TMP66]], i32 0
; CHECK-NEXT: [[TMP68:%.*]] = bitcast i32* [[TMP67]] to <64 x i32>*
; CHECK-NEXT: store <64 x i32> [[WIDE_MASKED_GATHER]], <64 x i32>* [[TMP68]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <64 x i32> [[VEC_IND]], <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
; CHECK-NEXT: [[TMP69:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP69]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 256, 256
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_07]], [[K]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[MUL]]
; CHECK-NEXT: [[TMP70:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_07]]
; CHECK-NEXT: store i32 [[TMP70]], i32* [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 256
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: ret void
;
@ -239,14 +328,47 @@ define void @scev4stride1(i32* noalias nocapture %a, i32* noalias nocapture read
; AUTOVF-NEXT: for.body.preheader:
; AUTOVF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; AUTOVF: vector.ph:
; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[K:%.*]], i32 0
; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[K:%.*]], i32 0
; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]]
; AUTOVF: vector.body:
; AUTOVF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AUTOVF-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; AUTOVF-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; AUTOVF-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; AUTOVF-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
; AUTOVF-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 4
; AUTOVF-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5
; AUTOVF-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6
; AUTOVF-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7
; AUTOVF-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; AUTOVF-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], <8 x i32> [[TMP8]]
; AUTOVF-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP9]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
; AUTOVF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]]
; AUTOVF-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0
; AUTOVF-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <8 x i32>*
; AUTOVF-NEXT: store <8 x i32> [[WIDE_MASKED_GATHER]], <8 x i32>* [[TMP12]], align 4
; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; AUTOVF-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; AUTOVF-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
; AUTOVF-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
; AUTOVF: middle.block:
; AUTOVF-NEXT: [[CMP_N:%.*]] = icmp eq i32 256, 256
; AUTOVF-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; AUTOVF: scalar.ph:
; AUTOVF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
; AUTOVF-NEXT: br label [[FOR_BODY:%.*]]
; AUTOVF: for.body:
; AUTOVF-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; AUTOVF-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_07]], [[K]]
; AUTOVF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[MUL]]
; AUTOVF-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; AUTOVF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_07]]
; AUTOVF-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX1]], align 4
; AUTOVF-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
; AUTOVF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 256
; AUTOVF-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]]
; AUTOVF: for.end.loopexit:
; AUTOVF-NEXT: ret void
;

View File

@ -19,8 +19,8 @@
;
; CHECK-LABEL: vector.ph:
; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> undef, i32 %n, i32 0
; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i32 0
; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-LABEL: vector.body:
; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
@ -48,8 +48,8 @@
; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body
; AVX-LABEL: vector.ph:
; AVX: %[[SplatVal:.*]] = insertelement <8 x i32> undef, i32 %n, i32 0
; AVX: %[[Splat:.*]] = shufflevector <8 x i32> %[[SplatVal]], <8 x i32> undef, <8 x i32> zeroinitializer
; AVX: %[[SplatVal:.*]] = insertelement <8 x i32> poison, i32 %n, i32 0
; AVX: %[[Splat:.*]] = shufflevector <8 x i32> %[[SplatVal]], <8 x i32> poison, <8 x i32> zeroinitializer
; AVX-LABEL: vector.body:
; AVX: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]

View File

@ -17,8 +17,8 @@ define void @small_tc(float* noalias nocapture %A, float* noalias nocapture read
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]]

View File

@ -82,8 +82,8 @@ define void @example2(i32 %n, i32 %x) optsize {
; CHECK: vector.ph:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP3]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TMP3]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
@ -142,8 +142,8 @@ define void @example2(i32 %n, i32 %x) optsize {
; CHECK: vector.ph10:
; CHECK-NEXT: [[N_RND_UP11:%.*]] = add nuw nsw i64 [[TMP19]], 4
; CHECK-NEXT: [[N_VEC13:%.*]] = and i64 [[N_RND_UP11]], 8589934588
; CHECK-NEXT: [[BROADCAST_SPLATINSERT20:%.*]] = insertelement <4 x i64> undef, i64 [[TMP19]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT21:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT20]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP19]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT21:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT20]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY9:%.*]]
; CHECK: vector.body9:
; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT15:%.*]], [[PRED_STORE_CONTINUE51:%.*]] ]
@ -151,8 +151,8 @@ define void @example2(i32 %n, i32 %x) optsize {
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX14]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT28]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX14]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT28]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT29]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP23:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT21]]
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP23]], i32 0
@ -329,13 +329,13 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
; CHECK: vector.ph:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP3]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TMP3]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE27:%.*]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT14]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT14]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT15]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
@ -534,8 +534,8 @@ define void @example23c(i16* noalias nocapture %src, i32* noalias nocapture %dst
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE22:%.*]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[INDUCTION]], <i64 257, i64 257, i64 257, i64 257>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0

View File

@ -13,8 +13,8 @@ define dso_local void @tail_folding_enabled(i32* noalias nocapture %A, i32* noal
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], <i64 429, i64 429, i64 429, i64 429, i64 429, i64 429, i64 429, i64 429>
@ -84,8 +84,8 @@ define dso_local void @tail_folding_disabled(i32* noalias nocapture %A, i32* noa
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], <i64 429, i64 429, i64 429, i64 429, i64 429, i64 429, i64 429, i64 429>
@ -167,14 +167,14 @@ define i32 @reduction_i32(i32* nocapture readonly %A, i32* nocapture readonly %B
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP2]], 1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[TRIP_COUNT_MINUS_1]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[INDUCTION]], [[BROADCAST_SPLAT]]

View File

@ -74,17 +74,17 @@ define i32 @uniform_load2(i32* align(4) %addr) {
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ADDR:%.*]], align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ADDR]], align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP5]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ADDR]], align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ADDR]], align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[TMP7]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8]] = add <4 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP9]] = add <4 x i32> [[VEC_PHI1]], [[BROADCAST_SPLAT5]]
; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[VEC_PHI2]], [[BROADCAST_SPLAT7]]
@ -614,17 +614,17 @@ define i32 @uniform_load_global() {
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* @GAddr, align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* @GAddr, align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP5]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* @GAddr, align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* @GAddr, align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[TMP7]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8]] = add <4 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP9]] = add <4 x i32> [[VEC_PHI1]], [[BROADCAST_SPLAT5]]
; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[VEC_PHI2]], [[BROADCAST_SPLAT7]]
@ -689,17 +689,17 @@ define i32 @uniform_load_constexpr() {
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr (i32, i32* @GAddr, i64 5), align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr (i32, i32* @GAddr, i64 5), align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP5]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr (i32, i32* @GAddr, i64 5), align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr (i32, i32* @GAddr, i64 5), align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[TMP7]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8]] = add <4 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP9]] = add <4 x i32> [[VEC_PHI1]], [[BROADCAST_SPLAT5]]
; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[VEC_PHI2]], [[BROADCAST_SPLAT7]]

View File

@ -38,7 +38,7 @@ define void @vectorized(float* noalias nocapture %A, float* noalias nocapture re
; CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[TMP8]], align 4, !llvm.access.group !0
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !1
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP1:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 20, 16
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@ -55,7 +55,7 @@ define void @vectorized(float* noalias nocapture %A, float* noalias nocapture re
; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4, !llvm.access.group !0
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !4
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@ -94,8 +94,8 @@ define void @vectorized1(float* noalias nocapture %A, float* noalias nocapture r
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], <i64 19, i64 19, i64 19, i64 19, i64 19, i64 19, i64 19, i64 19>
@ -112,7 +112,7 @@ define void @vectorized1(float* noalias nocapture %A, float* noalias nocapture r
; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP8]], <8 x float>* [[TMP9]], i32 4, <8 x i1> [[TMP1]]), !llvm.access.group !6
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !7
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@ -128,7 +128,7 @@ define void @vectorized1(float* noalias nocapture %A, float* noalias nocapture r
; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4, !llvm.access.group !6
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !9
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP9:!llvm.loop !.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@ -166,8 +166,8 @@ define void @vectorized2(float* noalias nocapture %A, float* noalias nocapture r
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]]
@ -183,7 +183,7 @@ define void @vectorized2(float* noalias nocapture %A, float* noalias nocapture r
; CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[TMP8]], align 4, !llvm.access.group !6
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 16, 16
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@ -200,7 +200,7 @@ define void @vectorized2(float* noalias nocapture %A, float* noalias nocapture r
; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4, !llvm.access.group !6
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 16
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !11
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP11:!llvm.loop !.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;

View File

@ -33,8 +33,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1(
; DISABLED_MASKED_STRIDED-NEXT: entry:
; DISABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; DISABLED_MASKED_STRIDED: vector.body:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
@ -127,15 +127,15 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; DISABLED_MASKED_STRIDED: for.end:
; DISABLED_MASKED_STRIDED-NEXT: ret void
;
; ENABLED_MASKED_STRIDED-LABEL: @masked_strided1(
; ENABLED_MASKED_STRIDED-NEXT: entry:
; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; ENABLED_MASKED_STRIDED: vector.body:
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -144,16 +144,16 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <8 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP5]], i32 1, <8 x i1> [[TMP0]])
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1016
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP6]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP6]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; ENABLED_MASKED_STRIDED: for.body:
; ENABLED_MASKED_STRIDED-NEXT: [[IX_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 1016, [[VECTOR_BODY]] ]
; ENABLED_MASKED_STRIDED-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_09]], [[CONV]]
@ -168,7 +168,7 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
; ENABLED_MASKED_STRIDED: for.inc:
; ENABLED_MASKED_STRIDED-NEXT: [[INC]] = add nuw nsw i32 [[IX_09]], 1
; ENABLED_MASKED_STRIDED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !2
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]]
; ENABLED_MASKED_STRIDED: for.end:
; ENABLED_MASKED_STRIDED-NEXT: ret void
;
@ -209,8 +209,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize(
; DISABLED_MASKED_STRIDED-NEXT: entry:
; DISABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; DISABLED_MASKED_STRIDED: vector.body:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
@ -303,15 +303,15 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !2
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP2:!llvm.loop !.*]]
; DISABLED_MASKED_STRIDED: for.end:
; DISABLED_MASKED_STRIDED-NEXT: ret void
;
; ENABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize(
; ENABLED_MASKED_STRIDED-NEXT: entry:
; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; ENABLED_MASKED_STRIDED: vector.body:
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -320,17 +320,17 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP6]], i32 1, <8 x i1> [[TMP0]])
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !4
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; ENABLED_MASKED_STRIDED: for.end:
; ENABLED_MASKED_STRIDED-NEXT: ret void
;
@ -397,10 +397,10 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7
; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; DISABLED_MASKED_STRIDED: vector.body:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE16:%.*]] ]
@ -495,7 +495,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !3
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]]
; DISABLED_MASKED_STRIDED: for.end:
; DISABLED_MASKED_STRIDED-NEXT: ret void
;
@ -508,10 +508,10 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7
; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; ENABLED_MASKED_STRIDED: vector.body:
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -522,17 +522,17 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[TMP6]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP8]], i32 1, <8 x i1> [[TMP4]])
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !5
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]]
; ENABLED_MASKED_STRIDED: for.end:
; ENABLED_MASKED_STRIDED-NEXT: ret void
;
@ -604,10 +604,10 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7
; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; DISABLED_MASKED_STRIDED: vector.body:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE16:%.*]] ]
@ -702,7 +702,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !4
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; DISABLED_MASKED_STRIDED: for.end:
; DISABLED_MASKED_STRIDED-NEXT: ret void
;
@ -715,10 +715,10 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7
; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; ENABLED_MASKED_STRIDED: vector.body:
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -729,17 +729,17 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <24 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> undef, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <24 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <24 x i8> @llvm.masked.load.v24i8.p0v24i8(<24 x i8>* [[TMP5]], i32 1, <24 x i1> [[TMP6]], <24 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i8> [[WIDE_MASKED_VEC]], <24 x i8> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i8> [[WIDE_MASKED_VEC]], <24 x i8> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP8]], i32 1, <8 x i1> [[TMP4]])
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !6
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
; ENABLED_MASKED_STRIDED: for.end:
; ENABLED_MASKED_STRIDED-NEXT: ret void
;
@ -843,7 +843,7 @@ define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture read
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP35]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP35]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]]
; DISABLED_MASKED_STRIDED: for.end:
; DISABLED_MASKED_STRIDED-NEXT: ret void
;
@ -856,13 +856,13 @@ define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture read
; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP0]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP2]], i32 1, <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <8 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: store <8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP4]], align 1
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !7
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]]
; ENABLED_MASKED_STRIDED: for.end:
; ENABLED_MASKED_STRIDED-NEXT: ret void
;
@ -914,8 +914,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7
; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; DISABLED_MASKED_STRIDED: vector.body:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
@ -1008,7 +1008,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !6
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
; DISABLED_MASKED_STRIDED: for.end:
; DISABLED_MASKED_STRIDED-NEXT: ret void
;
@ -1020,28 +1020,28 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7
; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; ENABLED_MASKED_STRIDED: vector.body:
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[INDEX]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[INDEX]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[INDUCTION]], [[BROADCAST_SPLAT]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP6]], i32 1, <8 x i1> [[TMP0]])
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !8
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
; ENABLED_MASKED_STRIDED: for.end:
; ENABLED_MASKED_STRIDED-NEXT: ret void
;
@ -1099,8 +1099,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2(
; DISABLED_MASKED_STRIDED-NEXT: entry:
; DISABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; DISABLED_MASKED_STRIDED: vector.body:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ]
@ -1418,15 +1418,15 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !7
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]]
; DISABLED_MASKED_STRIDED: for.end:
; DISABLED_MASKED_STRIDED-NEXT: ret void
;
; ENABLED_MASKED_STRIDED-LABEL: @masked_strided2(
; ENABLED_MASKED_STRIDED-NEXT: entry:
; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; ENABLED_MASKED_STRIDED: vector.body:
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -1435,10 +1435,10 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC1]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC1]], <8 x i8> [[STRIDED_VEC]]
@ -1451,7 +1451,7 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !9
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP9:!llvm.loop !.*]]
; ENABLED_MASKED_STRIDED: for.end:
; ENABLED_MASKED_STRIDED-NEXT: ret void
;
@ -1524,10 +1524,10 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7
; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[GUARD:%.*]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[GUARD:%.*]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; DISABLED_MASKED_STRIDED: vector.body:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE62:%.*]] ]
@ -1847,7 +1847,7 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; DISABLED_MASKED_STRIDED-NEXT: [[TMP168:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP168]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !8
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP168]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
; DISABLED_MASKED_STRIDED: for.end:
; DISABLED_MASKED_STRIDED-NEXT: ret void
;
@ -1859,10 +1859,10 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7
; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[GUARD:%.*]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[GUARD:%.*]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; ENABLED_MASKED_STRIDED: vector.body:
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -1873,10 +1873,10 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], 1
; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]]
@ -1889,7 +1889,7 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP13]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !10
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP13]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]]
; ENABLED_MASKED_STRIDED: for.end:
; ENABLED_MASKED_STRIDED-NEXT: ret void
;
@ -1968,8 +1968,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7
; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; DISABLED_MASKED_STRIDED: vector.body:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ]
@ -2287,7 +2287,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !9
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP9:!llvm.loop !.*]]
; DISABLED_MASKED_STRIDED: for.end:
; DISABLED_MASKED_STRIDED-NEXT: ret void
;
@ -2299,22 +2299,22 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7
; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; ENABLED_MASKED_STRIDED: vector.body:
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[INDEX]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[INDEX]], i32 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[INDUCTION]], [[BROADCAST_SPLAT]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef)
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]]
@ -2326,7 +2326,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !11
; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP11:!llvm.loop !.*]]
; ENABLED_MASKED_STRIDED: for.end:
; ENABLED_MASKED_STRIDED-NEXT: ret void
;

View File

@ -460,8 +460,8 @@ for.end:
; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* %A, i64 [[INDEX]]
; INTER-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <32 x i8>*
; INTER-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, <32 x i8>* [[TMP5]], align 1
; INTER-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
; INTER-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> undef, <4 x i32> <i32 3, i32 11, i32 19, i32 27>
; INTER-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
; INTER-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 3, i32 11, i32 19, i32 27>
; INTER-NEXT: [[TMP6:%.*]] = xor <4 x i8> [[STRIDED_VEC5]], [[STRIDED_VEC]]
; INTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* %B, i64 [[INDEX]]
; INTER-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <4 x i8>*

View File

@ -14,8 +14,8 @@ define dso_local void @constTC(i32* noalias nocapture %A) optsize {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1>
; CHECK-NEXT: [[INDUCTION1:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], <i32 2, i32 3>
; CHECK-NEXT: [[INDUCTION2:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], <i32 4, i32 5>

View File

@ -18,8 +18,8 @@ define dso_local void @alignTC(i32* noalias nocapture %A, i32 %n) optsize {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]]

View File

@ -8,8 +8,8 @@
define void @can_sink_after_store(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr #0 {
; CHECK-LABEL: vector.ph:
; CHECK: %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %x, i32 0
; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK: %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3
; CHECK-NEXT: br label %vector.body
@ -62,8 +62,8 @@ exit:
; and not introduce traps on additional paths.
define void @sink_sdiv(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr #0 {
; CHECK-LABEL: vector.ph:
; CHECK: %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %x, i32 0
; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK: %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3
; CHECK-NEXT: br label %vector.body

View File

@ -15,15 +15,15 @@
; VEC4_INTERL1-LABEL: @fp_iv_loop1(
; VEC4_INTERL1: vector.ph:
; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> undef, float %fpinc, i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float %init, i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float %fpinc, i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
; VEC4_INTERL1-NEXT: [[INDUCTION4:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP5]]
; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul fast float %fpinc, 4.000000e+00
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> undef, float [[TMP6]], i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: br label %vector.body
; VEC4_INTERL1: vector.body:
; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
@ -37,15 +37,15 @@
; VEC4_INTERL2-LABEL: @fp_iv_loop1(
; VEC4_INTERL2: vector.ph:
; VEC4_INTERL2: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x float> undef, float %fpinc, i32 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT3]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL2: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float %init, i32 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x float> poison, float %fpinc, i32 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT3]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[DOTSPLAT4]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
; VEC4_INTERL2-NEXT: [[INDUCTION5:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP5]]
; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast float %fpinc, 4.000000e+00
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> undef, float [[TMP6]], i32 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL2-NEXT: br label %vector.body
; VEC4_INTERL2: vector.body:
; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
@ -115,8 +115,8 @@ for.end: ; preds = %for.end.loopexit, %
; VEC4_INTERL1-LABEL: @fp_iv_loop2(
; VEC4_INTERL1: vector.ph:
; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float %init, i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[INDUCTION2:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
; VEC4_INTERL1-NEXT: br label %vector.body
; VEC4_INTERL1: vector.body:
@ -172,17 +172,17 @@ for.end: ; preds = %for.end.loopexit, %
; VEC4_INTERL1: for.body.lr.ph:
; VEC4_INTERL1: [[TMP0:%.*]] = load float, float* @fp_inc, align 4
; VEC4_INTERL1: vector.ph:
; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float %init, i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[DOTSPLAT6]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
; VEC4_INTERL1-NEXT: [[INDUCTION7:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP7]]
; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP0]], 4.000000e+00
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x float> undef, float [[TMP8]], i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT8]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT12]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x float> poison, float [[TMP8]], i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT8]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT12]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC4_INTERL1: vector.body:
; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]

View File

@ -50,8 +50,8 @@ define float @minloopattr(float* nocapture readonly %arg) #0 {
; CHECK-NEXT: [[T:%.*]] = load float, float* [[ARG:%.*]], align 4
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[T]], i32 0
; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[T]], i32 0
; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -66,7 +66,7 @@ define float @minloopattr(float* nocapture readonly %arg) #0 {
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x float> [[TMP5]], [[RDX_SHUF]]
@ -90,7 +90,7 @@ define float @minloopattr(float* nocapture readonly %arg) #0 {
; CHECK-NEXT: [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]]
; CHECK-NEXT: [[T7]] = add i64 [[T1]], 1
; CHECK-NEXT: [[T8:%.*]] = icmp eq i64 [[T7]], 65537
; CHECK-NEXT: br i1 [[T8]], label [[OUT]], label [[LOOP]], !llvm.loop !2
; CHECK-NEXT: br i1 [[T8]], label [[OUT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]]
; CHECK: out:
; CHECK-NEXT: [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret float [[T6_LCSSA]]

View File

@ -451,8 +451,8 @@ define void @minimal_bit_widths(i1 %c) {
;
; VEC-LABEL: @minimal_bit_widths(
; VEC-NEXT: entry:
; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> undef, i1 [[C:%.*]], i32 0
; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> undef, <2 x i32> zeroinitializer
; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0
; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
; VEC-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC: vector.body:
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]

View File

@ -15,15 +15,15 @@
; CHECK: for.body.lr.ph:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @int_inc, align 4
; CHECK: vector.ph:
; CHECK: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %init, i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> undef, i32 [[TMP0]], i32 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 %init, i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0]], i32 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT3]]
; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP0]], 8
; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP7]], i32 0
; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP7]], i32 0
; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %vector.body
; CHECK: vector.body:
; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
@ -86,15 +86,15 @@ for.end: ; preds = %for.end.loopexit, %
; CHECK-LABEL: @induction_with_loop_inv(
; CHECK: vector.ph:
; CHECK: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %x.011, i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> undef, i32 %j.012, i32 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 %x.011, i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 %j.012, i32 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT3]]
; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 %j.012, 8
; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP5]], i32 0
; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i32 0
; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %vector.body
; CHECK: vector.body:
; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
@ -161,13 +161,13 @@ for.end6: ; preds = %for.end6.loopexit,
; CHECK-LABEL: @non_primary_iv_loop_inv_trunc(
; CHECK: vector.ph:
; CHECK: [[TMP3:%.*]] = trunc i64 %step to i32
; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0
; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i32 0
; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT6]]
; CHECK-NEXT: [[INDUCTION7:%.*]] = add <8 x i32> zeroinitializer, [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP3]], 8
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <8 x i32> undef, i32 [[TMP5]], i32 0
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT8]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i32 0
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %vector.body
; CHECK: vector.body:
; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]

View File

@ -702,8 +702,8 @@ exit:
; CHECK-LABEL: @nonprimary(
; CHECK: vector.ph:
; CHECK: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
; CHECK: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK: %[[INSERT:.*]] = insertelement <2 x i32> poison, i32 %i, i32 0
; CHECK: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1>
; CHECK: vector.body:
; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
@ -721,8 +721,8 @@ exit:
;
; IND-LABEL: @nonprimary(
; IND: vector.ph:
; IND: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
; IND: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
; IND: %[[INSERT:.*]] = insertelement <2 x i32> poison, i32 %i, i32 0
; IND: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; IND: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1>
; IND: vector.body:
; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
@ -739,8 +739,8 @@ exit:
;
; UNROLL-LABEL: @nonprimary(
; UNROLL: vector.ph:
; UNROLL: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
; UNROLL: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
; UNROLL: %[[INSERT:.*]] = insertelement <2 x i32> poison, i32 %i, i32 0
; UNROLL: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; UNROLL: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1>
; UNROLL: vector.body:
; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]

View File

@ -37,8 +37,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: vector.body:
; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
%class.Complex = type { float, float }

View File

@ -18,7 +18,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
;
; CHECK: vector.body:
; CHECK: %wide.vec = load <4 x i64>, <4 x i64>* %{{.*}}
; CHECK: %strided.vec = shufflevector <4 x i64> %wide.vec, <4 x i64> undef, <2 x i32> <i32 0, i32 2>
; CHECK: %strided.vec = shufflevector <4 x i64> %wide.vec, <4 x i64> poison, <2 x i32> <i32 0, i32 2>
;
; CHECK: pred.store.if
; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0
@ -67,7 +67,7 @@ for.end:
;
; CHECK: vector.body:
; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}}
; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32> <i32 0, i32 2>
; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
;
; CHECK: pred.store.if
; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0
@ -126,7 +126,7 @@ for.end:
;
; CHECK: vector.body:
; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}}
; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32> <i32 0, i32 2>
; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
; CHECK: store i64 %x, {{.*}}
; CHECK: store i64 %x, {{.*}}
;

View File

@ -18,8 +18,8 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; CHECK-LABEL: @test_array_load2_store2(
; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: add nsw <4 x i32>
; CHECK: mul nsw <4 x i32>
; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
@ -69,9 +69,9 @@ for.end: ; preds = %for.body
; CHECK-LABEL: @test_struct_array_load3_store3(
; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
; CHECK: add nsw <4 x i32> {{.*}}, <i32 1, i32 1, i32 1, i32 1>
; CHECK: add nsw <4 x i32> {{.*}}, <i32 2, i32 2, i32 2, i32 2>
; CHECK: add nsw <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
@ -147,10 +147,10 @@ define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) {
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], %struct.ST4* [[S:%.*]], i64 [[INDEX]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[STRIDED_VEC2]]
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC3]]
@ -274,9 +274,9 @@ for.body: ; preds = %for.body, %entry
; CHECK: %[[G1:.+]] = getelementptr inbounds i32, i32* %[[G0]], i64 -6
; CHECK: %[[B0:.+]] = bitcast i32* %[[G1]] to <8 x i32>*
; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %[[B0]], align 4
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK: add nsw <4 x i32>
; CHECK: sub nsw <4 x i32>
@ -328,7 +328,7 @@ for.body: ; preds = %for.body, %entry
; CHECK-LABEL: @even_load_static_tc(
; CHECK: vector.body:
; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: icmp eq i64 %index.next, 508
; CHECK: middle.block:
; CHECK: br i1 false, label %for.cond.cleanup, label %scalar.ph
@ -371,7 +371,7 @@ for.body: ; preds = %for.body, %entry
; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
; CHECK: vector.body:
; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: icmp eq i64 %index.next, %n.vec
; CHECK: middle.block:
; CHECK: br i1 false, label %for.cond.cleanup, label %scalar.ph
@ -416,7 +416,7 @@ for.body: ; preds = %for.body, %entry
; CHECK-LABEL: @load_gap_reverse(
; CHECK-NOT: %wide.vec = load <8 x i64>, <8 x i64>* %{{.*}}, align 8
; CHECK-NOT: %strided.vec = shufflevector <8 x i64> %wide.vec, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NOT: %strided.vec = shufflevector <8 x i64> %wide.vec, <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%pair = type { i64, i64 }
define void @load_gap_reverse(%pair* noalias nocapture readonly %P1, %pair* noalias nocapture readonly %P2, i64 %X) {
@ -451,8 +451,8 @@ for.exit:
; CHECK-LABEL: @mixed_load2_store2(
; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: %interleaved.vec = shufflevector <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
; CHECK: store <8 x i32> %interleaved.vec
@ -495,9 +495,9 @@ for.body: ; preds = %for.body, %entry
; CHECK-LABEL: @mixed_load3_store3(
; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
; CHECK: %interleaved.vec = shufflevector <8 x i32> %{{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* %{{.*}}, align 4
@ -552,8 +552,8 @@ for.body: ; preds = %for.body, %entry
; CHECK-LABEL: @int_float_struct(
; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: bitcast <4 x i32> %[[V1]] to <4 x float>
; CHECK: add <4 x i32>
; CHECK: fadd fast <4 x float>
@ -670,7 +670,7 @@ for.end:
; CHECK: %[[X4:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 6
; CHECK: store i32 %[[X4:.+]], {{.*}}
; CHECK: %[[L2:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
; CHECK: %[[S1:.+]] = shufflevector <8 x i32> %[[L2]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: %[[S1:.+]] = shufflevector <8 x i32> %[[L2]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: add <4 x i32> %[[S1]], %[[Phi]]
define i32 @PR27626_1(%pair.i32 *%p, i64 %n) {
@ -771,7 +771,7 @@ for.end:
; CHECK: %[[X4:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 6
; CHECK: store i32 %[[X4:.+]], {{.*}}
; CHECK: %[[L2:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
; CHECK: %[[S1:.+]] = shufflevector <8 x i32> %[[L2]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: %[[S1:.+]] = shufflevector <8 x i32> %[[L2]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: add <4 x i32> %[[S1]], %[[Phi]]
define i32 @PR27626_3(%pair.i32 *%p, i64 %n, i32 %z) {
@ -812,10 +812,10 @@ for.end:
; CHECK-LABEL: @PR27626_4(
; CHECK: vector.ph:
; CHECK: %[[INS_Y:.+]] = insertelement <4 x i32> undef, i32 %y, i32 0
; CHECK: %[[SPLAT_Y:.+]] = shufflevector <4 x i32> %[[INS_Y]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK: %[[INS_Z:.+]] = insertelement <4 x i32> undef, i32 %z, i32 0
; CHECK: %[[SPLAT_Z:.+]] = shufflevector <4 x i32> %[[INS_Z]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK: %[[INS_Y:.+]] = insertelement <4 x i32> poison, i32 %y, i32 0
; CHECK: %[[SPLAT_Y:.+]] = shufflevector <4 x i32> %[[INS_Y]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK: %[[INS_Z:.+]] = insertelement <4 x i32> poison, i32 %z, i32 0
; CHECK: %[[SPLAT_Z:.+]] = shufflevector <4 x i32> %[[INS_Z]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK: vector.body:
; CHECK: store i32 %x, {{.*}}
; CHECK: store i32 %x, {{.*}}
@ -905,8 +905,8 @@ for.end:
; CHECK: vector.body:
; CHECK: %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[VSHUF1:.+]], %vector.body ]
; CHECK: %wide.vec = load <8 x i16>
; CHECK: %[[VSHUF0:.+]] = shufflevector <8 x i16> %wide.vec, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: %[[VSHUF1:.+]] = shufflevector <8 x i16> %wide.vec, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: %[[VSHUF0:.+]] = shufflevector <8 x i16> %wide.vec, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK: %[[VSHUF1:.+]] = shufflevector <8 x i16> %wide.vec, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: %[[VSHUF:.+]] = shufflevector <4 x i16> %vector.recur, <4 x i16> %[[VSHUF1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK: sext <4 x i16> %[[VSHUF0]] to <4 x i32>
; CHECK: sext <4 x i16> %[[VSHUF]] to <4 x i32>

View File

@ -183,10 +183,10 @@ for.end: ; preds = %for.body
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> undef, i32 [[K:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> undef, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT7]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[K:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT7]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@ -289,10 +289,10 @@ for.end: ; preds = %for.body
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> undef, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> undef, i1 [[CMP]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[K]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[K]], i32 3
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[BROADCAST_SPLAT6]], <4 x i32> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]

View File

@ -55,8 +55,8 @@ define void @bottom_tested(i16* %p, i32 %n) {
; TAILFOLD-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
; TAILFOLD-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; TAILFOLD-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]]
; TAILFOLD: vector.body:
; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]

View File

@ -11,8 +11,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; Turn this into a max reduction. Make sure we use a splat to initialize the
; vector for the reduction.
; CHECK-LABEL: @max_red(
; CHECK: %[[VAR:.*]] = insertelement <2 x i32> undef, i32 %max, i32 0
; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK: %[[VAR:.*]] = insertelement <2 x i32> poison, i32 %max, i32 0
; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK: icmp sgt <2 x i32>
; CHECK: select <2 x i1>
; CHECK: middle.block

View File

@ -68,8 +68,8 @@ define void @Test(%struct.s* nocapture %obj, i64 %z) #0 {
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4, !alias.scope !0
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4, !alias.scope !3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[BROADCAST_SPLAT]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 2, i64 [[I]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0

View File

@ -122,6 +122,30 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f1(
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: entry:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK-PROFITABLE-BY-DEFAULT: for.body.preheader:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
; CHECK-PROFITABLE-BY-DEFAULT: for.body:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[BB:%.*]], i64 [[INDVARS_IV]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[CC:%.*]], i64 [[INDVARS_IV]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ADD:%.*]] = fadd fast float [[TMP0]], [[TMP1]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[AA:%.*]], i64 [[INDVARS_IV]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
; CHECK-PROFITABLE-BY-DEFAULT: for.end.loopexit:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_END]]
; CHECK-PROFITABLE-BY-DEFAULT: for.end:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: ret void
;
entry:
%cmp1 = icmp sgt i32 %N, 0
br i1 %cmp1, label %for.body.preheader, label %for.end
@ -213,7 +237,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 -3
; CHECK-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP23]] to <4 x float>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP24]], align 4, !alias.scope !13
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP25:%.*]] = fadd fast <4 x float> [[REVERSE]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, float* [[TMP26]], i32 0
@ -249,7 +273,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, float* [[TMP36]], i32 -3
; CHECK-NEXT: [[TMP38:%.*]] = bitcast float* [[TMP37]] to <4 x float>*
; CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP38]], align 4
; CHECK-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x float> [[WIDE_LOAD16]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x float> [[WIDE_LOAD16]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP39:%.*]] = fadd fast <4 x float> [[REVERSE17]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP30]]
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 0
@ -287,6 +311,34 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
;
; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f2(
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: entry:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 1
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK-PROFITABLE-BY-DEFAULT: for.body.preheader:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
; CHECK-PROFITABLE-BY-DEFAULT: for.body:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[I_014:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP1:%.*]] = xor i32 [[I_014]], -1
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[SUB2:%.*]] = add i32 [[TMP1]], [[N]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB2]] to i64
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[IDXPROM]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX]], align 4
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP2]], 1.000000e+00
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store float [[CONV3]], float* [[ARRAYIDX5]], align 4
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
; CHECK-PROFITABLE-BY-DEFAULT: for.end.loopexit:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_END]]
; CHECK-PROFITABLE-BY-DEFAULT: for.end:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: ret i32 0
;
entry:
%cmp1 = icmp sgt i32 %n, 1
br i1 %cmp1, label %for.body.preheader, label %for.end
@ -320,6 +372,69 @@ for.end: ; preds = %for.end.loopexit, %
}
define void @f3(i8* noalias %A, i64 %n) {
; CHECK-LABEL: @f3(
; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>*
; CHECK-NEXT: store <4 x i8> <i8 1, i8 1, i8 1, i8 1>, <4 x i8>* [[TMP3]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP21:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]]
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4
; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]]
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX4]], 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <4 x i8>*
; CHECK-NEXT: store <4 x i8> <i8 1, i8 1, i8 1, i8 1>, <4 x i8>* [[TMP8]], align 1
; CHECK-NEXT: [[INDEX_NEXT5]] = add i64 [[INDEX4]], 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]]
; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP22:!llvm.loop !.*]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[IV]]
; CHECK-NEXT: store i8 1, i8* [[ARRAYIDX]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP23:!llvm.loop !.*]]
; CHECK: for.end.loopexit.loopexit:
; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f3(
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: iter.check:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2

View File

@ -229,8 +229,8 @@ define void @stride1(i16* noalias %B, i32 %BStride) optsize {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> undef, i32 [[BSTRIDE:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]

View File

@ -15,8 +15,8 @@
;
; RUN: opt -S -loop-vectorize -enable-vplan-native-path < %s | FileCheck %s
; CHECK-LABEL: vector.ph:
; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> undef, i32 %n, i32 0
; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i32 0
; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-LABEL: vector.body:
; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]

View File

@ -16,10 +16,10 @@
; RUN: opt -S -loop-vectorize -enable-vplan-native-path < %s | FileCheck %s
; CHECK: %[[ZeroTripChk:.*]] = icmp sgt i32 %jCount, 0
; CHECK-LABEL: vector.ph:
; CHECK: %[[CVal0:.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
; CHECK-NEXT: %[[CSplat:.*]] = shufflevector <4 x i32> %[[CVal0]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK: %[[ZVal0:.*]] = insertelement <4 x i1> undef, i1 %[[ZeroTripChk]], i32 0
; CHECK-NEXT: %[[ZSplat:.*]] = shufflevector <4 x i1> %[[ZVal0]], <4 x i1> undef, <4 x i32> zeroinitializer
; CHECK: %[[CVal0:.*]] = insertelement <4 x i32> poison, i32 %c, i32 0
; CHECK-NEXT: %[[CSplat:.*]] = shufflevector <4 x i32> %[[CVal0]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK: %[[ZVal0:.*]] = insertelement <4 x i1> poison, i1 %[[ZeroTripChk]], i32 0
; CHECK-NEXT: %[[ZSplat:.*]] = shufflevector <4 x i1> %[[ZVal0]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-LABEL: vector.body:
; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]

View File

@ -6,15 +6,23 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
; Function Attrs: nofree norecurse nounwind
define void @a(i8* readnone %b) {
; CHECK-LABEL: @a(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B1:%.*]] = ptrtoint i8* [[B:%.*]] to i64
; CHECK-NEXT: [[CMP_NOT4:%.*]] = icmp eq i8* [[B]], null
; CHECK-NEXT: br i1 [[CMP_NOT4]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[B1]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0:%.*]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[N_VEC]], -1
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ null, %vector.ph ], [ [[PTR_IND:%.*]], %pred.store.continue7 ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %pred.store.continue7 ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i64> <i64 0, i64 -1, i64 -2, i64 -3>
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP2]], i64 -1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i8*> [[TMP3]], i32 0
@ -22,14 +30,64 @@ define void @a(i8* readnone %b) {
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP5]], i32 -3
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <4 x i8>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP7]], align 1
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i8*> [[TMP3]], i32 0
; CHECK-NEXT: store i8 95, i8* [[TMP11]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP9]], i32 1
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
; CHECK: pred.store.if2:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i8*> [[TMP3]], i32 1
; CHECK-NEXT: store i8 95, i8* [[TMP13]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]]
; CHECK: pred.store.continue3:
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP9]], i32 2
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
; CHECK: pred.store.if4:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i8*> [[TMP3]], i32 2
; CHECK-NEXT: store i8 95, i8* [[TMP15]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
; CHECK: pred.store.continue5:
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP9]], i32 3
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
; CHECK: pred.store.if6:
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i8*> [[TMP3]], i32 3
; CHECK-NEXT: store i8 95, i8* [[TMP17]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]]
; CHECK: pred.store.continue7:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 -4
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[C_05:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[C_05]], i64 -1
; CHECK-NEXT: [[TMP19:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP19]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: store i8 95, i8* [[INCDEC_PTR]], align 1
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8* [[INCDEC_PTR]], [[B]]
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]]
;
entry:
%cmp.not4 = icmp eq i8* %b, null

View File

@ -34,8 +34,8 @@ define void @scev4stride1(i32* noalias nocapture %a, i32* noalias nocapture read
; CHECK-NEXT: for.body.preheader:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[K:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[K:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

View File

@ -18,13 +18,13 @@ define i16 @test_true_and_false_branch_equal() {
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE4:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 99, [[TMP0]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[OFFSET_IDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], <i16 0, i16 1>
; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* @v_38, align 1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i16> undef, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT1]], <2 x i16> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT1]], <2 x i16> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i16> [[BROADCAST_SPLAT2]], <i16 32767, i16 32767>
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i16> [[BROADCAST_SPLAT2]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>

View File

@ -25,22 +25,22 @@ define void @test(i16 %x, i64 %y, i32* %ptr) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[INC]]
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP2]], 1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[TRIP_COUNT_MINUS_1]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[INC]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> undef, i64 [[OFFSET_IDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[INC]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INC]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i64> <i64 0, i64 1>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], [[TMP3]]
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 0, [[INC]]
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], [[TMP4]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> undef, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT4]], <i64 0, i64 1>
; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
@ -58,10 +58,10 @@ define void @test(i16 %x, i64 %y, i32* %ptr) {
; CHECK-NEXT: [[OFFSET_IDX7:%.*]] = mul i64 [[INDEX]], [[INC]]
; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[OFFSET_IDX7]] to i8
; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[INC]] to i8
; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <2 x i8> undef, i8 [[TMP9]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT8]], <2 x i8> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> undef, i8 [[TMP10]], i32 0
; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT10]], <2 x i8> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT8]], <2 x i8> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> poison, i8 [[TMP10]], i32 0
; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT10]], <2 x i8> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = mul <2 x i8> <i8 0, i8 1>, [[DOTSPLAT11]]
; CHECK-NEXT: [[INDUCTION12:%.*]] = add <2 x i8> [[BROADCAST_SPLAT9]], [[TMP11]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i8 0, [[TMP10]]
@ -70,6 +70,34 @@ define void @test(i16 %x, i64 %y, i32* %ptr) {
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4
; CHECK-NEXT: [[V2:%.*]] = trunc i64 [[IV]] to i8
; CHECK-NEXT: [[V3:%.*]] = add i8 [[V2]], 1
; CHECK-NEXT: [[CMP15:%.*]] = icmp slt i8 [[V3]], 5
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[INC]]
; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], [[LOOP2:!llvm.loop !.*]]
; CHECK: loop.exit:
; CHECK-NEXT: [[DIV_1:%.*]] = udiv i64 [[Y]], [[ADD]]
; CHECK-NEXT: [[V1:%.*]] = add i64 [[DIV_1]], 1
; CHECK-NEXT: br label [[LOOP_2:%.*]]
; CHECK: loop.2:
; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[IV_NEXT_1:%.*]], [[LOOP_2]] ], [ 0, [[LOOP_EXIT]] ]
; CHECK-NEXT: [[IV_NEXT_1]] = add i64 [[IV_1]], [[V1]]
; CHECK-NEXT: call void @use(i64 [[IV_NEXT_1]])
; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV_NEXT_1]], 200
; CHECK-NEXT: br i1 [[EC]], label [[LOOP_2]], label [[LOOP_2_EXIT:%.*]]
; CHECK: loop.2.exit:
; CHECK-NEXT: [[C:%.*]] = call i1 @cond()
; CHECK-NEXT: br i1 [[C]], label [[LOOP_PREHEADER]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%conv19 = sext i16 %x to i64

View File

@ -18,7 +18,7 @@
; CHECKUF1: %[[IDXB:.*]] = getelementptr inbounds double, double* %b, i64 %index
; CHECKUF1: %[[IDXB_CAST:.*]] = bitcast double* %[[IDXB]] to <vscale x 4 x double>*
; CHECKUF1: %wide.load = load <vscale x 4 x double>, <vscale x 4 x double>* %[[IDXB_CAST]], align 8, !alias.scope !0
; CHECKUF1: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> undef, double 1.000000e+00, i32 0), <vscale x 4 x double> undef, <vscale x 4 x i32> zeroinitializer)
; CHECKUF1: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, double* %a, i64 %index
; CHECKUF1: %[[IDXA_CAST:.*]] = bitcast double* %[[IDXA]] to <vscale x 4 x double>*
; CHECKUF1: store <vscale x 4 x double> %[[FADD]], <vscale x 4 x double>* %[[IDXA_CAST]], align 8, !alias.scope !3, !noalias !0
@ -55,8 +55,8 @@
; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds double, double* %[[IDXB]], i64 %[[VSCALE2_EXT]]
; CHECKUF2: %[[IDXB_NEXT_CAST:.*]] = bitcast double* %[[IDXB_NEXT]] to <vscale x 4 x double>*
; CHECKUF2: %wide.load{{[0-9]+}} = load <vscale x 4 x double>, <vscale x 4 x double>* %[[IDXB_NEXT_CAST]], align 8, !alias.scope !0
; CHECKUF2: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> undef, double 1.000000e+00, i32 0), <vscale x 4 x double> undef, <vscale x 4 x i32> zeroinitializer)
; CHECKUF2: %[[FADD_NEXT:.*]] = fadd <vscale x 4 x double> %wide.load{{[0-9]+}}, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> undef, double 1.000000e+00, i32 0), <vscale x 4 x double> undef, <vscale x 4 x i32> zeroinitializer)
; CHECKUF2: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
; CHECKUF2: %[[FADD_NEXT:.*]] = fadd <vscale x 4 x double> %wide.load{{[0-9]+}}, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, double* %a, i64 %index
; CHECKUF2: %[[IDXA_CAST:.*]] = bitcast double* %[[IDXA]] to <vscale x 4 x double>*
; CHECKUF2: store <vscale x 4 x double> %[[FADD]], <vscale x 4 x double>* %[[IDXA_CAST]], align 8, !alias.scope !3, !noalias !0

View File

@ -19,19 +19,19 @@ define i32 @test(i64 %N, i32 %x) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[EXTRA_ITER]], [[N_VEC]]
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[EXTRA_ITER]], 1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TRIP_COUNT_MINUS_1]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[EXTRA_ITER]], [[INDEX]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> undef, i64 [[OFFSET_IDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[OFFSET_IDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 -1, i64 -2, i64 -3>
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], <i32 10, i32 10, i32 10, i32 10>

View File

@ -35,8 +35,8 @@ for.end:
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* %b, i64 1
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32*> [[DOTSPLATINSERT]], <4 x i32*> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32*> [[DOTSPLATINSERT]], <4 x i32*> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32** [[TMP2]] to <4 x i32*>*
; CHECK-NEXT: store <4 x i32*> [[DOTSPLAT]], <4 x i32*>* [[TMP3]], align 8

View File

@ -5,236 +5,236 @@
define void @transpose_multiply(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x double>* %C.Ptr) {
; CHECK-LABEL: @transpose_multiply(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <9 x double>* [[A_PTR:%.*]] to double*
; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP0]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST]], align 8
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 3
; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST1]], align 8
; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr double, double* [[TMP0]], i64 6
; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[VEC_GEP3]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST4]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <9 x double>* [[B_PTR:%.*]] to double*
; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[TMP1]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST6]], align 8
; CHECK-NEXT: [[VEC_GEP8:%.*]] = getelementptr double, double* [[TMP1]], i64 3
; CHECK-NEXT: [[VEC_CAST9:%.*]] = bitcast double* [[VEC_GEP8]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD10:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST9]], align 8
; CHECK-NEXT: [[VEC_GEP11:%.*]] = getelementptr double, double* [[TMP1]], i64 6
; CHECK-NEXT: [[VEC_CAST12:%.*]] = bitcast double* [[VEC_GEP11]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST12]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x double> undef, double [[TMP2]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x double> [[TMP3]], double [[TMP4]], i64 1
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x double> [[TMP5]], double [[TMP6]], i64 2
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x double> undef, double [[TMP8]], i64 0
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 1
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 1
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 1
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x double> [[TMP11]], double [[TMP12]], i64 2
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 2
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x double> undef, double [[TMP14]], i64 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 2
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 1
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <3 x double> [[TMP17]], double [[TMP18]], i64 2
; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP23:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]]
; CHECK-NEXT: [[TMP24:%.*]] = fadd <1 x double> [[TMP21]], [[TMP23]]
; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP25]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP26:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]]
; CHECK-NEXT: [[TMP27:%.*]] = fadd <1 x double> [[TMP24]], [[TMP26]]
; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <1 x double> [[TMP27]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP28]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]]
; CHECK-NEXT: [[BLOCK23:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP32]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP33:%.*]] = fmul <1 x double> [[BLOCK23]], [[SPLAT_SPLAT25]]
; CHECK-NEXT: [[TMP34:%.*]] = fadd <1 x double> [[TMP31]], [[TMP33]]
; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = fmul <1 x double> [[BLOCK26]], [[SPLAT_SPLAT28]]
; CHECK-NEXT: [[TMP37:%.*]] = fadd <1 x double> [[TMP34]], [[TMP36]]
; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <1 x double> [[TMP37]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <3 x double> [[TMP29]], <3 x double> [[TMP38]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK29:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP40]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP41:%.*]] = fmul <1 x double> [[BLOCK29]], [[SPLAT_SPLAT31]]
; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP43:%.*]] = fmul <1 x double> [[BLOCK32]], [[SPLAT_SPLAT34]]
; CHECK-NEXT: [[TMP44:%.*]] = fadd <1 x double> [[TMP41]], [[TMP43]]
; CHECK-NEXT: [[BLOCK35:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP45:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP45]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP46:%.*]] = fmul <1 x double> [[BLOCK35]], [[SPLAT_SPLAT37]]
; CHECK-NEXT: [[TMP47:%.*]] = fadd <1 x double> [[TMP44]], [[TMP46]]
; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <1 x double> [[TMP47]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <3 x double> [[TMP39]], <3 x double> [[TMP48]], <3 x i32> <i32 0, i32 1, i32 3>
; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP50]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP51:%.*]] = fmul <1 x double> [[BLOCK38]], [[SPLAT_SPLAT40]]
; CHECK-NEXT: [[BLOCK41:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP53:%.*]] = fmul <1 x double> [[BLOCK41]], [[SPLAT_SPLAT43]]
; CHECK-NEXT: [[TMP54:%.*]] = fadd <1 x double> [[TMP51]], [[TMP53]]
; CHECK-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP55]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP56:%.*]] = fmul <1 x double> [[BLOCK44]], [[SPLAT_SPLAT46]]
; CHECK-NEXT: [[TMP57:%.*]] = fadd <1 x double> [[TMP54]], [[TMP56]]
; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <1 x double> [[TMP57]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP58]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK47:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP60]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP61:%.*]] = fmul <1 x double> [[BLOCK47]], [[SPLAT_SPLAT49]]
; CHECK-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP63:%.*]] = fmul <1 x double> [[BLOCK50]], [[SPLAT_SPLAT52]]
; CHECK-NEXT: [[TMP64:%.*]] = fadd <1 x double> [[TMP61]], [[TMP63]]
; CHECK-NEXT: [[BLOCK53:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP65:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP65]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP66:%.*]] = fmul <1 x double> [[BLOCK53]], [[SPLAT_SPLAT55]]
; CHECK-NEXT: [[TMP67:%.*]] = fadd <1 x double> [[TMP64]], [[TMP66]]
; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <1 x double> [[TMP67]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <3 x double> [[TMP59]], <3 x double> [[TMP68]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK56:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT57:%.*]] = insertelement <1 x double> poison, double [[TMP70]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT58:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT57]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP71:%.*]] = fmul <1 x double> [[BLOCK56]], [[SPLAT_SPLAT58]]
; CHECK-NEXT: [[BLOCK59:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP72:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT60:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT61:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT60]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP73:%.*]] = fmul <1 x double> [[BLOCK59]], [[SPLAT_SPLAT61]]
; CHECK-NEXT: [[TMP74:%.*]] = fadd <1 x double> [[TMP71]], [[TMP73]]
; CHECK-NEXT: [[BLOCK62:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP75:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT63:%.*]] = insertelement <1 x double> poison, double [[TMP75]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT64:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT63]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP76:%.*]] = fmul <1 x double> [[BLOCK62]], [[SPLAT_SPLAT64]]
; CHECK-NEXT: [[TMP77:%.*]] = fadd <1 x double> [[TMP74]], [[TMP76]]
; CHECK-NEXT: [[TMP78:%.*]] = shufflevector <1 x double> [[TMP77]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP79:%.*]] = shufflevector <3 x double> [[TMP69]], <3 x double> [[TMP78]], <3 x i32> <i32 0, i32 1, i32 3>
; CHECK-NEXT: [[BLOCK65:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT66:%.*]] = insertelement <1 x double> poison, double [[TMP80]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT67:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT66]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP81:%.*]] = fmul <1 x double> [[BLOCK65]], [[SPLAT_SPLAT67]]
; CHECK-NEXT: [[BLOCK68:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP82:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT69:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT70:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT69]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP83:%.*]] = fmul <1 x double> [[BLOCK68]], [[SPLAT_SPLAT70]]
; CHECK-NEXT: [[TMP84:%.*]] = fadd <1 x double> [[TMP81]], [[TMP83]]
; CHECK-NEXT: [[BLOCK71:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP85:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT72:%.*]] = insertelement <1 x double> poison, double [[TMP85]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT73:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT72]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP86:%.*]] = fmul <1 x double> [[BLOCK71]], [[SPLAT_SPLAT73]]
; CHECK-NEXT: [[TMP87:%.*]] = fadd <1 x double> [[TMP84]], [[TMP86]]
; CHECK-NEXT: [[TMP88:%.*]] = shufflevector <1 x double> [[TMP87]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP88]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK74:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT75:%.*]] = insertelement <1 x double> poison, double [[TMP90]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT76:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT75]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP91:%.*]] = fmul <1 x double> [[BLOCK74]], [[SPLAT_SPLAT76]]
; CHECK-NEXT: [[BLOCK77:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP92:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT78:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT79:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT78]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP93:%.*]] = fmul <1 x double> [[BLOCK77]], [[SPLAT_SPLAT79]]
; CHECK-NEXT: [[TMP94:%.*]] = fadd <1 x double> [[TMP91]], [[TMP93]]
; CHECK-NEXT: [[BLOCK80:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP95:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT81:%.*]] = insertelement <1 x double> poison, double [[TMP95]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT82:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT81]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP96:%.*]] = fmul <1 x double> [[BLOCK80]], [[SPLAT_SPLAT82]]
; CHECK-NEXT: [[TMP97:%.*]] = fadd <1 x double> [[TMP94]], [[TMP96]]
; CHECK-NEXT: [[TMP98:%.*]] = shufflevector <1 x double> [[TMP97]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP99:%.*]] = shufflevector <3 x double> [[TMP89]], <3 x double> [[TMP98]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK83:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT84:%.*]] = insertelement <1 x double> poison, double [[TMP100]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT85:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT84]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP101:%.*]] = fmul <1 x double> [[BLOCK83]], [[SPLAT_SPLAT85]]
; CHECK-NEXT: [[BLOCK86:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP102:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT87:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT88:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT87]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP103:%.*]] = fmul <1 x double> [[BLOCK86]], [[SPLAT_SPLAT88]]
; CHECK-NEXT: [[TMP104:%.*]] = fadd <1 x double> [[TMP101]], [[TMP103]]
; CHECK-NEXT: [[BLOCK89:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP105:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT90:%.*]] = insertelement <1 x double> poison, double [[TMP105]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT91:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT90]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP106:%.*]] = fmul <1 x double> [[BLOCK89]], [[SPLAT_SPLAT91]]
; CHECK-NEXT: [[TMP107:%.*]] = fadd <1 x double> [[TMP104]], [[TMP106]]
; CHECK-NEXT: [[TMP108:%.*]] = shufflevector <1 x double> [[TMP107]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP109:%.*]] = shufflevector <3 x double> [[TMP99]], <3 x double> [[TMP108]], <3 x i32> <i32 0, i32 1, i32 3>
; CHECK-NEXT: [[TMP110:%.*]] = bitcast <9 x double>* [[C_PTR:%.*]] to double*
; CHECK-NEXT: [[VEC_CAST92:%.*]] = bitcast double* [[TMP110]] to <3 x double>*
; CHECK-NEXT: store <3 x double> [[TMP49]], <3 x double>* [[VEC_CAST92]], align 8
; CHECK-NEXT: [[VEC_GEP93:%.*]] = getelementptr double, double* [[TMP110]], i64 3
; CHECK-NEXT: [[VEC_CAST94:%.*]] = bitcast double* [[VEC_GEP93]] to <3 x double>*
; CHECK-NEXT: store <3 x double> [[TMP79]], <3 x double>* [[VEC_CAST94]], align 8
; CHECK-NEXT: [[VEC_GEP95:%.*]] = getelementptr double, double* [[TMP110]], i64 6
; CHECK-NEXT: [[VEC_CAST96:%.*]] = bitcast double* [[VEC_GEP95]] to <3 x double>*
; CHECK-NEXT: store <3 x double> [[TMP109]], <3 x double>* [[VEC_CAST96]], align 8
; CHECK-NEXT: ret void
;
; Load columns of input matrixes %A and %B.
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <9 x double>* [[A_PTR:%.*]] to double*
; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP0]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST]], align 8
; CHECK-NEXT: [[COL_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 3
; CHECK-NEXT: [[COL_CAST1:%.*]] = bitcast double* [[COL_GEP]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST1]], align 8
; CHECK-NEXT: [[COL_GEP3:%.*]] = getelementptr double, double* [[TMP0]], i64 6
; CHECK-NEXT: [[COL_CAST4:%.*]] = bitcast double* [[COL_GEP3]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST4]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <9 x double>* [[B_PTR:%.*]] to double*
; CHECK-NEXT: [[COL_CAST6:%.*]] = bitcast double* [[TMP1]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST6]], align 8
; CHECK-NEXT: [[COL_GEP8:%.*]] = getelementptr double, double* [[TMP1]], i64 3
; CHECK-NEXT: [[COL_CAST9:%.*]] = bitcast double* [[COL_GEP8]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD10:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST9]], align 8
; CHECK-NEXT: [[COL_GEP11:%.*]] = getelementptr double, double* [[TMP1]], i64 6
; CHECK-NEXT: [[COL_CAST12:%.*]] = bitcast double* [[COL_GEP11]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST12]], align 8
; Transpose %A.
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[TMP0]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x double> [[TMP1]], double [[TMP2]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x double> [[TMP3]], double [[TMP4]], i64 2
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x double> undef, double [[TMP6]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x double> [[TMP7]], double [[TMP8]], i64 1
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 1
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 2
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 2
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x double> undef, double [[TMP12]], i64 0
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 2
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x double> [[TMP13]], double [[TMP14]], i64 1
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 2
; Lower multiply(transpose(%A), %B)
; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP18]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> undef, double [[TMP20]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = fmul <1 x double> [[BLOCK6]], [[SPLAT_SPLAT8]]
; CHECK-NEXT: [[TMP22:%.*]] = fadd <1 x double> [[TMP19]], [[TMP21]]
; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK12:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> undef, double [[TMP28]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP29:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]]
; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> undef, double [[TMP30]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
; CHECK-NEXT: [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]]
; CHECK-NEXT: [[BLOCK18:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> undef, double [[TMP33]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP34:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]]
; CHECK-NEXT: [[TMP35:%.*]] = fadd <1 x double> [[TMP32]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <1 x double> [[TMP35]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP36]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> undef, double [[TMP38]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP39:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]]
; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> undef, double [[TMP40]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP41:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]]
; CHECK-NEXT: [[TMP42:%.*]] = fadd <1 x double> [[TMP39]], [[TMP41]]
; CHECK-NEXT: [[BLOCK27:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> undef, double [[TMP43]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP44:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]]
; CHECK-NEXT: [[TMP45:%.*]] = fadd <1 x double> [[TMP42]], [[TMP44]]
; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <1 x double> [[TMP45]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <3 x double> [[TMP37]], <3 x double> [[TMP46]], <3 x i32> <i32 0, i32 1, i32 3>
; CHECK-NEXT: [[BLOCK30:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> undef, double [[TMP48]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP49:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]]
; CHECK-NEXT: [[BLOCK33:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> undef, double [[TMP50]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP51:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]]
; CHECK-NEXT: [[TMP52:%.*]] = fadd <1 x double> [[TMP49]], [[TMP51]]
; CHECK-NEXT: [[BLOCK36:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> undef, double [[TMP53]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP54:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]]
; CHECK-NEXT: [[TMP55:%.*]] = fadd <1 x double> [[TMP52]], [[TMP54]]
; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <1 x double> [[TMP55]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP56]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK39:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP58:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> undef, double [[TMP58]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP59:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]]
; CHECK-NEXT: [[BLOCK42:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> undef, double [[TMP60]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP61:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]]
; CHECK-NEXT: [[TMP62:%.*]] = fadd <1 x double> [[TMP59]], [[TMP61]]
; CHECK-NEXT: [[BLOCK45:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP63:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> undef, double [[TMP63]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP64:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]]
; CHECK-NEXT: [[TMP65:%.*]] = fadd <1 x double> [[TMP62]], [[TMP64]]
; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <1 x double> [[TMP65]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <3 x double> [[TMP57]], <3 x double> [[TMP66]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK48:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP68:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> undef, double [[TMP68]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP69:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]]
; CHECK-NEXT: [[BLOCK51:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> undef, double [[TMP70]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP71:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]]
; CHECK-NEXT: [[TMP72:%.*]] = fadd <1 x double> [[TMP69]], [[TMP71]]
; CHECK-NEXT: [[BLOCK54:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP73:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> undef, double [[TMP73]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP74:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]]
; CHECK-NEXT: [[TMP75:%.*]] = fadd <1 x double> [[TMP72]], [[TMP74]]
; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <1 x double> [[TMP75]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP77:%.*]] = shufflevector <3 x double> [[TMP67]], <3 x double> [[TMP76]], <3 x i32> <i32 0, i32 1, i32 3>
; CHECK-NEXT: [[BLOCK57:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP78:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> undef, double [[TMP78]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP79:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]]
; CHECK-NEXT: [[BLOCK60:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> undef, double [[TMP80]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP81:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]]
; CHECK-NEXT: [[TMP82:%.*]] = fadd <1 x double> [[TMP79]], [[TMP81]]
; CHECK-NEXT: [[BLOCK63:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP83:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> undef, double [[TMP83]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP84:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]]
; CHECK-NEXT: [[TMP85:%.*]] = fadd <1 x double> [[TMP82]], [[TMP84]]
; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <1 x double> [[TMP85]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP86]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK66:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP88:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> undef, double [[TMP88]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP89:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]]
; CHECK-NEXT: [[BLOCK69:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> undef, double [[TMP90]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP91:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]]
; CHECK-NEXT: [[TMP92:%.*]] = fadd <1 x double> [[TMP89]], [[TMP91]]
; CHECK-NEXT: [[BLOCK72:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP93:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> undef, double [[TMP93]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP94:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]]
; CHECK-NEXT: [[TMP95:%.*]] = fadd <1 x double> [[TMP92]], [[TMP94]]
; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <1 x double> [[TMP95]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <3 x double> [[TMP87]], <3 x double> [[TMP96]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK75:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP98:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> undef, double [[TMP98]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP99:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]]
; CHECK-NEXT: [[BLOCK78:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> undef, double [[TMP100]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP101:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]]
; CHECK-NEXT: [[TMP102:%.*]] = fadd <1 x double> [[TMP99]], [[TMP101]]
; CHECK-NEXT: [[BLOCK81:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP103:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> undef, double [[TMP103]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP104:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]]
; CHECK-NEXT: [[TMP105:%.*]] = fadd <1 x double> [[TMP102]], [[TMP104]]
; CHECK-NEXT: [[TMP106:%.*]] = shufflevector <1 x double> [[TMP105]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <3 x double> [[TMP97]], <3 x double> [[TMP106]], <3 x i32> <i32 0, i32 1, i32 3>
; Store result columns.
; CHECK-NEXT: [[TMP108:%.*]] = bitcast <9 x double>* [[C_PTR:%.*]] to double*
; CHECK-NEXT: [[TMP109:%.*]] = bitcast double* [[TMP108]] to <3 x double>*
; CHECK-NEXT: store <3 x double> [[TMP47]], <3 x double>* [[TMP109]], align 8
; CHECK-NEXT: [[TMP110:%.*]] = getelementptr double, double* [[TMP108]], i64 3
; CHECK-NEXT: [[TMP111:%.*]] = bitcast double* [[TMP110]] to <3 x double>*
; CHECK-NEXT: store <3 x double> [[TMP77]], <3 x double>* [[TMP111]], align 8
; CHECK-NEXT: [[TMP112:%.*]] = getelementptr double, double* [[TMP108]], i64 6
; CHECK-NEXT: [[TMP113:%.*]] = bitcast double* [[TMP112]] to <3 x double>*
; CHECK-NEXT: store <3 x double> [[TMP107]], <3 x double>* [[TMP113]], align 8
; CHECK-NEXT: ret void
;
entry:
%a = load <9 x double>, <9 x double>* %A.Ptr, align 8
@ -251,255 +251,256 @@ declare <9 x double> @llvm.matrix.multiply.v9f64.v9f64.v9f64(<9 x double>, <9 x
define void @transpose_multiply_add(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x double>* %C.Ptr) {
; CHECK-LABEL: @transpose_multiply_add(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <9 x double>* [[A_PTR:%.*]] to double*
; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP0]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST]], align 8
; CHECK-NEXT: [[COL_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 3
; CHECK-NEXT: [[COL_CAST1:%.*]] = bitcast double* [[COL_GEP]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST1]], align 8
; CHECK-NEXT: [[COL_GEP3:%.*]] = getelementptr double, double* [[TMP0]], i64 6
; CHECK-NEXT: [[COL_CAST4:%.*]] = bitcast double* [[COL_GEP3]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST4]], align 8
; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP0]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST]], align 8
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 3
; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST1]], align 8
; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr double, double* [[TMP0]], i64 6
; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[VEC_GEP3]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST4]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <9 x double>* [[B_PTR:%.*]] to double*
; CHECK-NEXT: [[COL_CAST6:%.*]] = bitcast double* [[TMP1]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST6]], align 8
; CHECK-NEXT: [[COL_GEP8:%.*]] = getelementptr double, double* [[TMP1]], i64 3
; CHECK-NEXT: [[COL_CAST9:%.*]] = bitcast double* [[COL_GEP8]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD10:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST9]], align 8
; CHECK-NEXT: [[COL_GEP11:%.*]] = getelementptr double, double* [[TMP1]], i64 6
; CHECK-NEXT: [[COL_CAST12:%.*]] = bitcast double* [[COL_GEP11]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST12]], align 8
; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[TMP1]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST6]], align 8
; CHECK-NEXT: [[VEC_GEP8:%.*]] = getelementptr double, double* [[TMP1]], i64 3
; CHECK-NEXT: [[VEC_CAST9:%.*]] = bitcast double* [[VEC_GEP8]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD10:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST9]], align 8
; CHECK-NEXT: [[VEC_GEP11:%.*]] = getelementptr double, double* [[TMP1]], i64 6
; CHECK-NEXT: [[VEC_CAST12:%.*]] = bitcast double* [[VEC_GEP11]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST12]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x double> undef, double [[TMP2]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x double> [[TMP3]], double [[TMP4]], i64 1
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x double> [[TMP5]], double [[TMP6]], i64 2
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x double> undef, double [[TMP8]], i64 0
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 1
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 1
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 1
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x double> [[TMP11]], double [[TMP12]], i64 2
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 2
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x double> undef, double [[TMP14]], i64 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 2
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 1
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <3 x double> [[TMP17]], double [[TMP18]], i64 2
; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP23:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]]
; CHECK-NEXT: [[TMP24:%.*]] = fadd <1 x double> [[TMP21]], [[TMP23]]
; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP25]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP26:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]]
; CHECK-NEXT: [[TMP27:%.*]] = fadd <1 x double> [[TMP24]], [[TMP26]]
; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <1 x double> [[TMP27]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP28]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]]
; CHECK-NEXT: [[BLOCK23:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP32]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP33:%.*]] = fmul <1 x double> [[BLOCK23]], [[SPLAT_SPLAT25]]
; CHECK-NEXT: [[TMP34:%.*]] = fadd <1 x double> [[TMP31]], [[TMP33]]
; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = fmul <1 x double> [[BLOCK26]], [[SPLAT_SPLAT28]]
; CHECK-NEXT: [[TMP37:%.*]] = fadd <1 x double> [[TMP34]], [[TMP36]]
; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <1 x double> [[TMP37]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <3 x double> [[TMP29]], <3 x double> [[TMP38]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK29:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP40]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP41:%.*]] = fmul <1 x double> [[BLOCK29]], [[SPLAT_SPLAT31]]
; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP43:%.*]] = fmul <1 x double> [[BLOCK32]], [[SPLAT_SPLAT34]]
; CHECK-NEXT: [[TMP44:%.*]] = fadd <1 x double> [[TMP41]], [[TMP43]]
; CHECK-NEXT: [[BLOCK35:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP45:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP45]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP46:%.*]] = fmul <1 x double> [[BLOCK35]], [[SPLAT_SPLAT37]]
; CHECK-NEXT: [[TMP47:%.*]] = fadd <1 x double> [[TMP44]], [[TMP46]]
; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <1 x double> [[TMP47]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <3 x double> [[TMP39]], <3 x double> [[TMP48]], <3 x i32> <i32 0, i32 1, i32 3>
; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP50]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP51:%.*]] = fmul <1 x double> [[BLOCK38]], [[SPLAT_SPLAT40]]
; CHECK-NEXT: [[BLOCK41:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP53:%.*]] = fmul <1 x double> [[BLOCK41]], [[SPLAT_SPLAT43]]
; CHECK-NEXT: [[TMP54:%.*]] = fadd <1 x double> [[TMP51]], [[TMP53]]
; CHECK-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP55]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP56:%.*]] = fmul <1 x double> [[BLOCK44]], [[SPLAT_SPLAT46]]
; CHECK-NEXT: [[TMP57:%.*]] = fadd <1 x double> [[TMP54]], [[TMP56]]
; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <1 x double> [[TMP57]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP58]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK47:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP60]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP61:%.*]] = fmul <1 x double> [[BLOCK47]], [[SPLAT_SPLAT49]]
; CHECK-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP63:%.*]] = fmul <1 x double> [[BLOCK50]], [[SPLAT_SPLAT52]]
; CHECK-NEXT: [[TMP64:%.*]] = fadd <1 x double> [[TMP61]], [[TMP63]]
; CHECK-NEXT: [[BLOCK53:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP65:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP65]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP66:%.*]] = fmul <1 x double> [[BLOCK53]], [[SPLAT_SPLAT55]]
; CHECK-NEXT: [[TMP67:%.*]] = fadd <1 x double> [[TMP64]], [[TMP66]]
; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <1 x double> [[TMP67]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <3 x double> [[TMP59]], <3 x double> [[TMP68]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK56:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT57:%.*]] = insertelement <1 x double> poison, double [[TMP70]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT58:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT57]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP71:%.*]] = fmul <1 x double> [[BLOCK56]], [[SPLAT_SPLAT58]]
; CHECK-NEXT: [[BLOCK59:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP72:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT60:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT61:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT60]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP73:%.*]] = fmul <1 x double> [[BLOCK59]], [[SPLAT_SPLAT61]]
; CHECK-NEXT: [[TMP74:%.*]] = fadd <1 x double> [[TMP71]], [[TMP73]]
; CHECK-NEXT: [[BLOCK62:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP75:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT63:%.*]] = insertelement <1 x double> poison, double [[TMP75]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT64:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT63]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP76:%.*]] = fmul <1 x double> [[BLOCK62]], [[SPLAT_SPLAT64]]
; CHECK-NEXT: [[TMP77:%.*]] = fadd <1 x double> [[TMP74]], [[TMP76]]
; CHECK-NEXT: [[TMP78:%.*]] = shufflevector <1 x double> [[TMP77]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP79:%.*]] = shufflevector <3 x double> [[TMP69]], <3 x double> [[TMP78]], <3 x i32> <i32 0, i32 1, i32 3>
; CHECK-NEXT: [[BLOCK65:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT66:%.*]] = insertelement <1 x double> poison, double [[TMP80]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT67:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT66]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP81:%.*]] = fmul <1 x double> [[BLOCK65]], [[SPLAT_SPLAT67]]
; CHECK-NEXT: [[BLOCK68:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP82:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT69:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT70:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT69]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP83:%.*]] = fmul <1 x double> [[BLOCK68]], [[SPLAT_SPLAT70]]
; CHECK-NEXT: [[TMP84:%.*]] = fadd <1 x double> [[TMP81]], [[TMP83]]
; CHECK-NEXT: [[BLOCK71:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP85:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT72:%.*]] = insertelement <1 x double> poison, double [[TMP85]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT73:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT72]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP86:%.*]] = fmul <1 x double> [[BLOCK71]], [[SPLAT_SPLAT73]]
; CHECK-NEXT: [[TMP87:%.*]] = fadd <1 x double> [[TMP84]], [[TMP86]]
; CHECK-NEXT: [[TMP88:%.*]] = shufflevector <1 x double> [[TMP87]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP88]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK74:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT75:%.*]] = insertelement <1 x double> poison, double [[TMP90]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT76:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT75]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP91:%.*]] = fmul <1 x double> [[BLOCK74]], [[SPLAT_SPLAT76]]
; CHECK-NEXT: [[BLOCK77:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP92:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT78:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT79:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT78]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP93:%.*]] = fmul <1 x double> [[BLOCK77]], [[SPLAT_SPLAT79]]
; CHECK-NEXT: [[TMP94:%.*]] = fadd <1 x double> [[TMP91]], [[TMP93]]
; CHECK-NEXT: [[BLOCK80:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP95:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT81:%.*]] = insertelement <1 x double> poison, double [[TMP95]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT82:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT81]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP96:%.*]] = fmul <1 x double> [[BLOCK80]], [[SPLAT_SPLAT82]]
; CHECK-NEXT: [[TMP97:%.*]] = fadd <1 x double> [[TMP94]], [[TMP96]]
; CHECK-NEXT: [[TMP98:%.*]] = shufflevector <1 x double> [[TMP97]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP99:%.*]] = shufflevector <3 x double> [[TMP89]], <3 x double> [[TMP98]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK83:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT84:%.*]] = insertelement <1 x double> poison, double [[TMP100]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT85:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT84]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP101:%.*]] = fmul <1 x double> [[BLOCK83]], [[SPLAT_SPLAT85]]
; CHECK-NEXT: [[BLOCK86:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP102:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT87:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT88:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT87]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP103:%.*]] = fmul <1 x double> [[BLOCK86]], [[SPLAT_SPLAT88]]
; CHECK-NEXT: [[TMP104:%.*]] = fadd <1 x double> [[TMP101]], [[TMP103]]
; CHECK-NEXT: [[BLOCK89:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP105:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT90:%.*]] = insertelement <1 x double> poison, double [[TMP105]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT91:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT90]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP106:%.*]] = fmul <1 x double> [[BLOCK89]], [[SPLAT_SPLAT91]]
; CHECK-NEXT: [[TMP107:%.*]] = fadd <1 x double> [[TMP104]], [[TMP106]]
; CHECK-NEXT: [[TMP108:%.*]] = shufflevector <1 x double> [[TMP107]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP109:%.*]] = shufflevector <3 x double> [[TMP99]], <3 x double> [[TMP108]], <3 x i32> <i32 0, i32 1, i32 3>
; CHECK-NEXT: [[TMP110:%.*]] = bitcast <9 x double>* [[C_PTR:%.*]] to double*
; CHECK-NEXT: [[VEC_CAST92:%.*]] = bitcast double* [[TMP110]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD93:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST92]], align 8
; CHECK-NEXT: [[VEC_GEP94:%.*]] = getelementptr double, double* [[TMP110]], i64 3
; CHECK-NEXT: [[VEC_CAST95:%.*]] = bitcast double* [[VEC_GEP94]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD96:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST95]], align 8
; CHECK-NEXT: [[VEC_GEP97:%.*]] = getelementptr double, double* [[TMP110]], i64 6
; CHECK-NEXT: [[VEC_CAST98:%.*]] = bitcast double* [[VEC_GEP97]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD99:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST98]], align 8
; CHECK-NEXT: [[TMP111:%.*]] = fadd <3 x double> [[COL_LOAD93]], [[TMP49]]
; CHECK-NEXT: [[TMP112:%.*]] = fadd <3 x double> [[COL_LOAD96]], [[TMP79]]
; CHECK-NEXT: [[TMP113:%.*]] = fadd <3 x double> [[COL_LOAD99]], [[TMP109]]
; CHECK-NEXT: [[TMP114:%.*]] = bitcast <9 x double>* [[C_PTR]] to double*
; CHECK-NEXT: [[VEC_CAST100:%.*]] = bitcast double* [[TMP114]] to <3 x double>*
; CHECK-NEXT: store <3 x double> [[TMP111]], <3 x double>* [[VEC_CAST100]], align 8
; CHECK-NEXT: [[VEC_GEP101:%.*]] = getelementptr double, double* [[TMP114]], i64 3
; CHECK-NEXT: [[VEC_CAST102:%.*]] = bitcast double* [[VEC_GEP101]] to <3 x double>*
; CHECK-NEXT: store <3 x double> [[TMP112]], <3 x double>* [[VEC_CAST102]], align 8
; CHECK-NEXT: [[VEC_GEP103:%.*]] = getelementptr double, double* [[TMP114]], i64 6
; CHECK-NEXT: [[VEC_CAST104:%.*]] = bitcast double* [[VEC_GEP103]] to <3 x double>*
; CHECK-NEXT: store <3 x double> [[TMP113]], <3 x double>* [[VEC_CAST104]], align 8
; CHECK-NEXT: ret void
;
; Transpose %A.
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[TMP0]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x double> [[TMP1]], double [[TMP2]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x double> [[TMP3]], double [[TMP4]], i64 2
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x double> undef, double [[TMP6]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x double> [[TMP7]], double [[TMP8]], i64 1
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 1
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 2
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 2
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x double> undef, double [[TMP12]], i64 0
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 2
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x double> [[TMP13]], double [[TMP14]], i64 1
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 2
; Lower multiply(transpose(%A), %B)
; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP18]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> undef, double [[TMP20]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = fmul <1 x double> [[BLOCK6]], [[SPLAT_SPLAT8]]
; CHECK-NEXT: [[TMP22:%.*]] = fadd <1 x double> [[TMP19]], [[TMP21]]
; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK12:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> undef, double [[TMP28]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP29:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]]
; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> undef, double [[TMP30]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
; CHECK-NEXT: [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]]
; CHECK-NEXT: [[BLOCK18:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> undef, double [[TMP33]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP34:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]]
; CHECK-NEXT: [[TMP35:%.*]] = fadd <1 x double> [[TMP32]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <1 x double> [[TMP35]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP36]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> undef, double [[TMP38]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP39:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]]
; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> undef, double [[TMP40]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP41:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]]
; CHECK-NEXT: [[TMP42:%.*]] = fadd <1 x double> [[TMP39]], [[TMP41]]
; CHECK-NEXT: [[BLOCK27:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> undef, double [[TMP43]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP44:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]]
; CHECK-NEXT: [[TMP45:%.*]] = fadd <1 x double> [[TMP42]], [[TMP44]]
; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <1 x double> [[TMP45]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <3 x double> [[TMP37]], <3 x double> [[TMP46]], <3 x i32> <i32 0, i32 1, i32 3>
; CHECK-NEXT: [[BLOCK30:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> undef, double [[TMP48]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP49:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]]
; CHECK-NEXT: [[BLOCK33:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> undef, double [[TMP50]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP51:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]]
; CHECK-NEXT: [[TMP52:%.*]] = fadd <1 x double> [[TMP49]], [[TMP51]]
; CHECK-NEXT: [[BLOCK36:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> undef, double [[TMP53]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP54:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]]
; CHECK-NEXT: [[TMP55:%.*]] = fadd <1 x double> [[TMP52]], [[TMP54]]
; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <1 x double> [[TMP55]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP56]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK39:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP58:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> undef, double [[TMP58]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP59:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]]
; CHECK-NEXT: [[BLOCK42:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> undef, double [[TMP60]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP61:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]]
; CHECK-NEXT: [[TMP62:%.*]] = fadd <1 x double> [[TMP59]], [[TMP61]]
; CHECK-NEXT: [[BLOCK45:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP63:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> undef, double [[TMP63]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP64:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]]
; CHECK-NEXT: [[TMP65:%.*]] = fadd <1 x double> [[TMP62]], [[TMP64]]
; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <1 x double> [[TMP65]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <3 x double> [[TMP57]], <3 x double> [[TMP66]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK48:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP68:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> undef, double [[TMP68]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP69:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]]
; CHECK-NEXT: [[BLOCK51:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> undef, double [[TMP70]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP71:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]]
; CHECK-NEXT: [[TMP72:%.*]] = fadd <1 x double> [[TMP69]], [[TMP71]]
; CHECK-NEXT: [[BLOCK54:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP73:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> undef, double [[TMP73]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP74:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]]
; CHECK-NEXT: [[TMP75:%.*]] = fadd <1 x double> [[TMP72]], [[TMP74]]
; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <1 x double> [[TMP75]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP77:%.*]] = shufflevector <3 x double> [[TMP67]], <3 x double> [[TMP76]], <3 x i32> <i32 0, i32 1, i32 3>
; CHECK-NEXT: [[BLOCK57:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP78:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> undef, double [[TMP78]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP79:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]]
; CHECK-NEXT: [[BLOCK60:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> undef, double [[TMP80]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP81:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]]
; CHECK-NEXT: [[TMP82:%.*]] = fadd <1 x double> [[TMP79]], [[TMP81]]
; CHECK-NEXT: [[BLOCK63:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP83:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> undef, double [[TMP83]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP84:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]]
; CHECK-NEXT: [[TMP85:%.*]] = fadd <1 x double> [[TMP82]], [[TMP84]]
; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <1 x double> [[TMP85]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP86]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK66:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP88:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> undef, double [[TMP88]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP89:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]]
; CHECK-NEXT: [[BLOCK69:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> undef, double [[TMP90]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP91:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]]
; CHECK-NEXT: [[TMP92:%.*]] = fadd <1 x double> [[TMP89]], [[TMP91]]
; CHECK-NEXT: [[BLOCK72:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP93:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> undef, double [[TMP93]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP94:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]]
; CHECK-NEXT: [[TMP95:%.*]] = fadd <1 x double> [[TMP92]], [[TMP94]]
; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <1 x double> [[TMP95]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <3 x double> [[TMP87]], <3 x double> [[TMP96]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK75:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP98:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> undef, double [[TMP98]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP99:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]]
; CHECK-NEXT: [[BLOCK78:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> undef, double [[TMP100]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP101:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]]
; CHECK-NEXT: [[TMP102:%.*]] = fadd <1 x double> [[TMP99]], [[TMP101]]
; CHECK-NEXT: [[BLOCK81:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP103:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
; CHECK-NEXT: [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> undef, double [[TMP103]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP104:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]]
; CHECK-NEXT: [[TMP105:%.*]] = fadd <1 x double> [[TMP102]], [[TMP104]]
; Embed result of multiply into flat vector.
; CHECK-NEXT: [[TMP106:%.*]] = shufflevector <1 x double> [[TMP105]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <3 x double> [[TMP97]], <3 x double> [[TMP106]], <3 x i32> <i32 0, i32 1, i32 3>
; Load %C.
; CHECK-NEXT: [[TMP110:%.*]] = bitcast <9 x double>* [[C_PTR:%.*]] to double*
; CHECK-NEXT: [[COL_CAST92:%.*]] = bitcast double* [[TMP110]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD93:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST92]], align 8
; CHECK-NEXT: [[COL_GEP94:%.*]] = getelementptr double, double* [[TMP110]], i64 3
; CHECK-NEXT: [[COL_CAST95:%.*]] = bitcast double* [[COL_GEP94]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD96:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST95]], align 8
; CHECK-NEXT: [[COL_GEP97:%.*]] = getelementptr double, double* [[TMP110]], i64 6
; CHECK-NEXT: [[COL_CAST98:%.*]] = bitcast double* [[COL_GEP97]] to <3 x double>*
; CHECK-NEXT: [[COL_LOAD99:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST98]], align 8
; Add column vectors.
; CHECK-NEXT: [[TMP108:%.*]] = fadd <3 x double> [[COL_LOAD93]], [[TMP47]]
; CHECK-NEXT: [[TMP109:%.*]] = fadd <3 x double> [[COL_LOAD96]], [[TMP77]]
; CHECK-NEXT: [[TMP110:%.*]] = fadd <3 x double> [[COL_LOAD99]], [[TMP107]]
; Store result columns.
; CHECK-NEXT: [[TMP111:%.*]] = bitcast <9 x double>* [[C_PTR]] to double*
; CHECK-NEXT: [[TMP112:%.*]] = bitcast double* [[TMP111]] to <3 x double>*
; CHECK-NEXT: store <3 x double> [[TMP108]], <3 x double>* [[TMP112]], align 8
; CHECK-NEXT: [[TMP113:%.*]] = getelementptr double, double* [[TMP111]], i64 3
; CHECK-NEXT: [[TMP114:%.*]] = bitcast double* [[TMP113]] to <3 x double>*
; CHECK-NEXT: store <3 x double> [[TMP109]], <3 x double>* [[TMP114]], align 8
; CHECK-NEXT: [[TMP115:%.*]] = getelementptr double, double* [[TMP111]], i64 6
; CHECK-NEXT: [[TMP116:%.*]] = bitcast double* [[TMP115]] to <3 x double>*
; CHECK-NEXT: store <3 x double> [[TMP110]], <3 x double>* [[TMP116]], align 8
; CHECK-NEXT: ret void
;
entry:
%a = load <9 x double>, <9 x double>* %A.Ptr, align 8
%b = load <9 x double>, <9 x double>* %B.Ptr, align 8

View File

@ -17,52 +17,52 @@ define void @test(i32 %r, i32 %c) {
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr ([5 x <4 x double>], [5 x <4 x double>]* @foo, i32 0, i32 0, i64 2) to <2 x double>*), align 8
; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
; CHECK-NEXT: [[BLOCK2:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT3:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT4:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT3]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT3:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT4:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT3]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[BLOCK2]], [[SPLAT_SPLAT4]]
; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> undef, double [[TMP7]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = fmul <1 x double> [[BLOCK5]], [[SPLAT_SPLAT7]]
; CHECK-NEXT: [[BLOCK8:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> undef, double [[TMP9]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[BLOCK8]], [[SPLAT_SPLAT10]]
; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[BLOCK11:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = fmul <1 x double> [[BLOCK11]], [[SPLAT_SPLAT13]]
; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> undef, double [[TMP16]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]]
; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> undef, double [[TMP21]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]]
; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]]
; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>

View File

@ -45,76 +45,76 @@ define void @multiply_sub_add_2x3_3x2(<6 x double>* %a.ptr, <6 x double>* %b.ptr
; RM-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[VEC_CAST18]], align 8
; RM-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP9:%.*]] = extractelement <3 x double> [[TMP2]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP9]], i32 0
; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]]
; RM-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP11:%.*]] = extractelement <3 x double> [[TMP2]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> undef, double [[TMP11]], i32 0
; RM-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP11]], i32 0
; RM-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP12:%.*]] = fmul <1 x double> [[SPLAT_SPLAT21]], [[BLOCK19]]
; RM-NEXT: [[TMP13:%.*]] = fadd <1 x double> [[TMP10]], [[TMP12]]
; RM-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[TMP2]], i64 2
; RM-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0
; RM-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
; RM-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP15:%.*]] = fmul <1 x double> [[SPLAT_SPLAT24]], [[BLOCK22]]
; RM-NEXT: [[TMP16:%.*]] = fadd <1 x double> [[TMP13]], [[TMP15]]
; RM-NEXT: [[TMP17:%.*]] = shufflevector <1 x double> [[TMP16]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; RM-NEXT: [[TMP18:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP17]], <2 x i32> <i32 2, i32 1>
; RM-NEXT: [[BLOCK25:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP19:%.*]] = extractelement <3 x double> [[TMP2]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> undef, double [[TMP19]], i32 0
; RM-NEXT: [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP19]], i32 0
; RM-NEXT: [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP20:%.*]] = fmul <1 x double> [[SPLAT_SPLAT27]], [[BLOCK25]]
; RM-NEXT: [[BLOCK28:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP21:%.*]] = extractelement <3 x double> [[TMP2]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x double> undef, double [[TMP21]], i32 0
; RM-NEXT: [[SPLAT_SPLAT30:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT29]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
; RM-NEXT: [[SPLAT_SPLAT30:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT29]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[SPLAT_SPLAT30]], [[BLOCK28]]
; RM-NEXT: [[TMP23:%.*]] = fadd <1 x double> [[TMP20]], [[TMP22]]
; RM-NEXT: [[BLOCK31:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP24:%.*]] = extractelement <3 x double> [[TMP2]], i64 2
; RM-NEXT: [[SPLAT_SPLATINSERT32:%.*]] = insertelement <1 x double> undef, double [[TMP24]], i32 0
; RM-NEXT: [[SPLAT_SPLAT33:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT32]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT32:%.*]] = insertelement <1 x double> poison, double [[TMP24]], i32 0
; RM-NEXT: [[SPLAT_SPLAT33:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT32]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP25:%.*]] = fmul <1 x double> [[SPLAT_SPLAT33]], [[BLOCK31]]
; RM-NEXT: [[TMP26:%.*]] = fadd <1 x double> [[TMP23]], [[TMP25]]
; RM-NEXT: [[TMP27:%.*]] = shufflevector <1 x double> [[TMP26]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; RM-NEXT: [[TMP28:%.*]] = shufflevector <2 x double> [[TMP18]], <2 x double> [[TMP27]], <2 x i32> <i32 0, i32 2>
; RM-NEXT: [[BLOCK34:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP29:%.*]] = extractelement <3 x double> [[TMP3]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT35:%.*]] = insertelement <1 x double> undef, double [[TMP29]], i32 0
; RM-NEXT: [[SPLAT_SPLAT36:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT35]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT35:%.*]] = insertelement <1 x double> poison, double [[TMP29]], i32 0
; RM-NEXT: [[SPLAT_SPLAT36:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT35]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP30:%.*]] = fmul <1 x double> [[SPLAT_SPLAT36]], [[BLOCK34]]
; RM-NEXT: [[BLOCK37:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP31:%.*]] = extractelement <3 x double> [[TMP3]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT38:%.*]] = insertelement <1 x double> undef, double [[TMP31]], i32 0
; RM-NEXT: [[SPLAT_SPLAT39:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT38]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT38:%.*]] = insertelement <1 x double> poison, double [[TMP31]], i32 0
; RM-NEXT: [[SPLAT_SPLAT39:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT38]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP32:%.*]] = fmul <1 x double> [[SPLAT_SPLAT39]], [[BLOCK37]]
; RM-NEXT: [[TMP33:%.*]] = fadd <1 x double> [[TMP30]], [[TMP32]]
; RM-NEXT: [[BLOCK40:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP34:%.*]] = extractelement <3 x double> [[TMP3]], i64 2
; RM-NEXT: [[SPLAT_SPLATINSERT41:%.*]] = insertelement <1 x double> undef, double [[TMP34]], i32 0
; RM-NEXT: [[SPLAT_SPLAT42:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT41]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT41:%.*]] = insertelement <1 x double> poison, double [[TMP34]], i32 0
; RM-NEXT: [[SPLAT_SPLAT42:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT41]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP35:%.*]] = fmul <1 x double> [[SPLAT_SPLAT42]], [[BLOCK40]]
; RM-NEXT: [[TMP36:%.*]] = fadd <1 x double> [[TMP33]], [[TMP35]]
; RM-NEXT: [[TMP37:%.*]] = shufflevector <1 x double> [[TMP36]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; RM-NEXT: [[TMP38:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP37]], <2 x i32> <i32 2, i32 1>
; RM-NEXT: [[BLOCK43:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP39:%.*]] = extractelement <3 x double> [[TMP3]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT44:%.*]] = insertelement <1 x double> undef, double [[TMP39]], i32 0
; RM-NEXT: [[SPLAT_SPLAT45:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT44]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT44:%.*]] = insertelement <1 x double> poison, double [[TMP39]], i32 0
; RM-NEXT: [[SPLAT_SPLAT45:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT44]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP40:%.*]] = fmul <1 x double> [[SPLAT_SPLAT45]], [[BLOCK43]]
; RM-NEXT: [[BLOCK46:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP41:%.*]] = extractelement <3 x double> [[TMP3]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT47:%.*]] = insertelement <1 x double> undef, double [[TMP41]], i32 0
; RM-NEXT: [[SPLAT_SPLAT48:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT47]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT47:%.*]] = insertelement <1 x double> poison, double [[TMP41]], i32 0
; RM-NEXT: [[SPLAT_SPLAT48:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT47]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP42:%.*]] = fmul <1 x double> [[SPLAT_SPLAT48]], [[BLOCK46]]
; RM-NEXT: [[TMP43:%.*]] = fadd <1 x double> [[TMP40]], [[TMP42]]
; RM-NEXT: [[BLOCK49:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP44:%.*]] = extractelement <3 x double> [[TMP3]], i64 2
; RM-NEXT: [[SPLAT_SPLATINSERT50:%.*]] = insertelement <1 x double> undef, double [[TMP44]], i32 0
; RM-NEXT: [[SPLAT_SPLAT51:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT50]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT50:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0
; RM-NEXT: [[SPLAT_SPLAT51:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT50]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP45:%.*]] = fmul <1 x double> [[SPLAT_SPLAT51]], [[BLOCK49]]
; RM-NEXT: [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]]
; RM-NEXT: [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>

View File

@ -12,49 +12,49 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> undef, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]])
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> undef, double [[TMP6]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = fmul <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]]
; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> undef, double [[TMP8]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]])
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP10]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> undef, double [[TMP12]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]]
; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]])
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP16]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> undef, double [[TMP18]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]]
; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> undef, double [[TMP20]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]])
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> [[TMP22]], <2 x i32> <i32 0, i32 2>

View File

@ -12,49 +12,49 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> undef, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]])
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> undef, double [[TMP6]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = fmul <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]]
; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> undef, double [[TMP8]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]])
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP10]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> undef, double [[TMP12]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]]
; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]])
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP16]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> undef, double [[TMP18]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]]
; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> undef, double [[TMP20]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]])
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> [[TMP22]], <2 x i32> <i32 0, i32 2>

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --verbose
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -lower-matrix-intrinsics -matrix-default-layout=row-major -S < %s | FileCheck --check-prefix=RM %s
@ -13,52 +13,52 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
; RM-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> undef, <2 x i32> <i32 2, i32 3>
; RM-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0
; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]]
; RM-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0
; RM-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
; RM-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[SPLAT_SPLAT6]], [[BLOCK4]]
; RM-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
; RM-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; RM-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> <i32 2, i32 1>
; RM-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> undef, double [[TMP7]], i32 0
; RM-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
; RM-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP8:%.*]] = fmul <1 x double> [[SPLAT_SPLAT9]], [[BLOCK7]]
; RM-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> undef, double [[TMP9]], i32 0
; RM-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
; RM-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT12]], [[BLOCK10]]
; RM-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
; RM-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; RM-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
; RM-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0
; RM-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
; RM-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP15:%.*]] = fmul <1 x double> [[SPLAT_SPLAT15]], [[BLOCK13]]
; RM-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> undef, double [[TMP16]], i32 0
; RM-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
; RM-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[SPLAT_SPLAT18]], [[BLOCK16]]
; RM-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
; RM-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; RM-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> <i32 2, i32 1>
; RM-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> undef, double [[TMP21]], i32 0
; RM-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
; RM-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[SPLAT_SPLAT21]], [[BLOCK19]]
; RM-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0
; RM-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
; RM-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[SPLAT_SPLAT24]], [[BLOCK22]]
; RM-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
; RM-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
@ -74,7 +74,6 @@ entry:
declare <4 x double> @llvm.matrix.multiply.v4f64.v4f64.v4f64(<4 x double>, <4 x double>, i32, i32, i32)
define <4 x double> @multiply_1x2(<2 x double> %a, <2 x double> %b) {
; RM-LABEL: @multiply_1x2(
; RM-NEXT: entry:
; RM-NEXT: [[SPLIT:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> undef, <1 x i32> zeroinitializer
@ -82,29 +81,29 @@ define <4 x double> @multiply_1x2(<2 x double> %a, <2 x double> %b) {
; RM-NEXT: [[SPLIT2:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
; RM-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP0:%.*]] = extractelement <1 x double> [[SPLIT]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0
; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]]
; RM-NEXT: [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; RM-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP2]], <2 x i32> <i32 2, i32 1>
; RM-NEXT: [[BLOCK3:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP4:%.*]] = extractelement <1 x double> [[SPLIT]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> undef, double [[TMP4]], i32 0
; RM-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i32 0
; RM-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP5:%.*]] = fmul <1 x double> [[SPLAT_SPLAT5]], [[BLOCK3]]
; RM-NEXT: [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; RM-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 2>
; RM-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP8:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> undef, double [[TMP8]], i32 0
; RM-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
; RM-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP9:%.*]] = fmul <1 x double> [[SPLAT_SPLAT8]], [[BLOCK6]]
; RM-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; RM-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP10]], <2 x i32> <i32 2, i32 1>
; RM-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP12:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> undef, double [[TMP12]], i32 0
; RM-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
; RM-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP13:%.*]] = fmul <1 x double> [[SPLAT_SPLAT11]], [[BLOCK9]]
; RM-NEXT: [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; RM-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP14]], <2 x i32> <i32 0, i32 2>
@ -128,117 +127,117 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
; RM-NEXT: [[SPLIT4:%.*]] = shufflevector <6 x double> [[B]], <6 x double> undef, <3 x i32> <i32 3, i32 4, i32 5>
; RM-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0
; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]]
; RM-NEXT: [[BLOCK5:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0
; RM-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
; RM-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[SPLAT_SPLAT7]], [[BLOCK5]]
; RM-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
; RM-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; RM-NEXT: [[TMP6:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP5]], <3 x i32> <i32 3, i32 1, i32 2>
; RM-NEXT: [[BLOCK8:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> undef, double [[TMP7]], i32 0
; RM-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
; RM-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP8:%.*]] = fmul <1 x double> [[SPLAT_SPLAT10]], [[BLOCK8]]
; RM-NEXT: [[BLOCK11:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> undef, double [[TMP9]], i32 0
; RM-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
; RM-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT13]], [[BLOCK11]]
; RM-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
; RM-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; RM-NEXT: [[TMP13:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP12]], <3 x i32> <i32 0, i32 3, i32 2>
; RM-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> <i32 2>
; RM-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0
; RM-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
; RM-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP15:%.*]] = fmul <1 x double> [[SPLAT_SPLAT16]], [[BLOCK14]]
; RM-NEXT: [[BLOCK17:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> <i32 2>
; RM-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> undef, double [[TMP16]], i32 0
; RM-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
; RM-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[SPLAT_SPLAT19]], [[BLOCK17]]
; RM-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
; RM-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; RM-NEXT: [[TMP20:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> [[TMP19]], <3 x i32> <i32 0, i32 1, i32 3>
; RM-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> undef, double [[TMP21]], i32 0
; RM-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
; RM-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[SPLAT_SPLAT22]], [[BLOCK20]]
; RM-NEXT: [[BLOCK23:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0
; RM-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
; RM-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[SPLAT_SPLAT25]], [[BLOCK23]]
; RM-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
; RM-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; RM-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
; RM-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> undef, double [[TMP28]], i32 0
; RM-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i32 0
; RM-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP29:%.*]] = fmul <1 x double> [[SPLAT_SPLAT28]], [[BLOCK26]]
; RM-NEXT: [[BLOCK29:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP30:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> undef, double [[TMP30]], i32 0
; RM-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0
; RM-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[SPLAT_SPLAT31]], [[BLOCK29]]
; RM-NEXT: [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]]
; RM-NEXT: [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; RM-NEXT: [[TMP34:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP33]], <3 x i32> <i32 0, i32 3, i32 2>
; RM-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> <i32 2>
; RM-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> undef, double [[TMP35]], i32 0
; RM-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0
; RM-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP36:%.*]] = fmul <1 x double> [[SPLAT_SPLAT34]], [[BLOCK32]]
; RM-NEXT: [[BLOCK35:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> <i32 2>
; RM-NEXT: [[TMP37:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> undef, double [[TMP37]], i32 0
; RM-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i32 0
; RM-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP38:%.*]] = fmul <1 x double> [[SPLAT_SPLAT37]], [[BLOCK35]]
; RM-NEXT: [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]]
; RM-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; RM-NEXT: [[TMP41:%.*]] = shufflevector <3 x double> [[TMP34]], <3 x double> [[TMP40]], <3 x i32> <i32 0, i32 1, i32 3>
; RM-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP42:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> undef, double [[TMP42]], i32 0
; RM-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
; RM-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP43:%.*]] = fmul <1 x double> [[SPLAT_SPLAT40]], [[BLOCK38]]
; RM-NEXT: [[BLOCK41:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP44:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> undef, double [[TMP44]], i32 0
; RM-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0
; RM-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP45:%.*]] = fmul <1 x double> [[SPLAT_SPLAT43]], [[BLOCK41]]
; RM-NEXT: [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]]
; RM-NEXT: [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; RM-NEXT: [[TMP48:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP47]], <3 x i32> <i32 3, i32 1, i32 2>
; RM-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP49:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> undef, double [[TMP49]], i32 0
; RM-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP49]], i32 0
; RM-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP50:%.*]] = fmul <1 x double> [[SPLAT_SPLAT46]], [[BLOCK44]]
; RM-NEXT: [[BLOCK47:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> <i32 1>
; RM-NEXT: [[TMP51:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> undef, double [[TMP51]], i32 0
; RM-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP51]], i32 0
; RM-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP52:%.*]] = fmul <1 x double> [[SPLAT_SPLAT49]], [[BLOCK47]]
; RM-NEXT: [[TMP53:%.*]] = fadd <1 x double> [[TMP50]], [[TMP52]]
; RM-NEXT: [[TMP54:%.*]] = shufflevector <1 x double> [[TMP53]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; RM-NEXT: [[TMP55:%.*]] = shufflevector <3 x double> [[TMP48]], <3 x double> [[TMP54]], <3 x i32> <i32 0, i32 3, i32 2>
; RM-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> <i32 2>
; RM-NEXT: [[TMP56:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
; RM-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> undef, double [[TMP56]], i32 0
; RM-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP56]], i32 0
; RM-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP57:%.*]] = fmul <1 x double> [[SPLAT_SPLAT52]], [[BLOCK50]]
; RM-NEXT: [[BLOCK53:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> <i32 2>
; RM-NEXT: [[TMP58:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
; RM-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> undef, double [[TMP58]], i32 0
; RM-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> undef, <1 x i32> zeroinitializer
; RM-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i32 0
; RM-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer
; RM-NEXT: [[TMP59:%.*]] = fmul <1 x double> [[SPLAT_SPLAT55]], [[BLOCK53]]
; RM-NEXT: [[TMP60:%.*]] = fadd <1 x double> [[TMP57]], [[TMP59]]
; RM-NEXT: [[TMP61:%.*]] = shufflevector <1 x double> [[TMP60]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>

View File

@ -12,52 +12,52 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> undef, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[BLOCK4]], [[SPLAT_SPLAT6]]
; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> undef, double [[TMP7]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = fmul <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]]
; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> undef, double [[TMP9]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[BLOCK10]], [[SPLAT_SPLAT12]]
; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]]
; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> undef, double [[TMP16]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[BLOCK16]], [[SPLAT_SPLAT18]]
; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> undef, double [[TMP21]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]]
; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
@ -80,29 +80,29 @@ define <4 x double> @multiply_1x2(<2 x double> %a, <2 x double> %b) {
; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <2 x double> [[B]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP2]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[BLOCK3:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> undef, double [[TMP4]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = fmul <1 x double> [[BLOCK3]], [[SPLAT_SPLAT5]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <1 x double> [[SPLIT2]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> undef, double [[TMP8]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = fmul <1 x double> [[BLOCK6]], [[SPLAT_SPLAT8]]
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP10]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <1 x double> [[SPLIT2]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> undef, double [[TMP12]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP14]], <2 x i32> <i32 0, i32 2>
@ -126,117 +126,117 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <6 x double> [[B]], <6 x double> undef, <2 x i32> <i32 4, i32 5>
; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
; CHECK-NEXT: [[BLOCK5:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[BLOCK5]], [[SPLAT_SPLAT7]]
; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP5]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK8:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> undef, double [[TMP7]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = fmul <1 x double> [[BLOCK8]], [[SPLAT_SPLAT10]]
; CHECK-NEXT: [[BLOCK11:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> undef, double [[TMP9]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[BLOCK11]], [[SPLAT_SPLAT13]]
; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP12]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]]
; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> undef, double [[TMP16]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]]
; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> [[TMP19]], <3 x i32> <i32 0, i32 1, i32 3>
; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> undef, double [[TMP21]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]]
; CHECK-NEXT: [[BLOCK23:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK23]], [[SPLAT_SPLAT25]]
; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> undef, double [[TMP28]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP29:%.*]] = fmul <1 x double> [[BLOCK26]], [[SPLAT_SPLAT28]]
; CHECK-NEXT: [[BLOCK29:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> undef, double [[TMP30]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[BLOCK29]], [[SPLAT_SPLAT31]]
; CHECK-NEXT: [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP33]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> undef, double [[TMP35]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = fmul <1 x double> [[BLOCK32]], [[SPLAT_SPLAT34]]
; CHECK-NEXT: [[BLOCK35:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> undef, double [[TMP37]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP38:%.*]] = fmul <1 x double> [[BLOCK35]], [[SPLAT_SPLAT37]]
; CHECK-NEXT: [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]]
; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <3 x double> [[TMP34]], <3 x double> [[TMP40]], <3 x i32> <i32 0, i32 1, i32 3>
; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> undef, double [[TMP42]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP43:%.*]] = fmul <1 x double> [[BLOCK38]], [[SPLAT_SPLAT40]]
; CHECK-NEXT: [[BLOCK41:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> undef, double [[TMP44]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP45:%.*]] = fmul <1 x double> [[BLOCK41]], [[SPLAT_SPLAT43]]
; CHECK-NEXT: [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]]
; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP47]], <3 x i32> <i32 3, i32 1, i32 2>
; CHECK-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> undef, double [[TMP49]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP49]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP50:%.*]] = fmul <1 x double> [[BLOCK44]], [[SPLAT_SPLAT46]]
; CHECK-NEXT: [[BLOCK47:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP51:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> undef, double [[TMP51]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP51]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP52:%.*]] = fmul <1 x double> [[BLOCK47]], [[SPLAT_SPLAT49]]
; CHECK-NEXT: [[TMP53:%.*]] = fadd <1 x double> [[TMP50]], [[TMP52]]
; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <1 x double> [[TMP53]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <3 x double> [[TMP48]], <3 x double> [[TMP54]], <3 x i32> <i32 0, i32 3, i32 2>
; CHECK-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP56:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> undef, double [[TMP56]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP56]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP57:%.*]] = fmul <1 x double> [[BLOCK50]], [[SPLAT_SPLAT52]]
; CHECK-NEXT: [[BLOCK53:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> <i32 2>
; CHECK-NEXT: [[TMP58:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> undef, double [[TMP58]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP59:%.*]] = fmul <1 x double> [[BLOCK53]], [[SPLAT_SPLAT55]]
; CHECK-NEXT: [[TMP60:%.*]] = fadd <1 x double> [[TMP57]], [[TMP59]]
; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <1 x double> [[TMP60]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>

View File

@ -12,49 +12,49 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> undef, float [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]])
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP4]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> undef, float [[TMP6]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = fmul <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]]
; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> undef, float [[TMP8]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP7]])
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP10]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> undef, float [[TMP12]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]]
; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> undef, float [[TMP14]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP13]])
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP16]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> undef, float [[TMP18]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP18]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]]
; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> <i32 1>
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> undef, float [[TMP20]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> undef, <1 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP20]], i32 0
; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP19]])
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x float> [[TMP17]], <2 x float> [[TMP22]], <2 x i32> <i32 0, i32 2>

Some files were not shown because too many files have changed in this diff Show More