diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c index dcf2a046927f..7fd64ed82b15 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c @@ -78,14 +78,14 @@ void test_core(void) { // CHECK-ASM: vst vd = vec_splat(vd, 0); - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer // CHECK-ASM: vrepg vd = vec_splat(vd, 1); // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> // CHECK-ASM: vrepg vd = vec_splats(d); - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer // CHECK-ASM: vlrepg vd = vec_mergeh(vd, vd); diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c index 4c7254faedcf..8dde5334d6a4 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c @@ -729,79 +729,79 @@ void test_core(void) { // CHECK: <2 x i64> vsc = vec_splat(vsc, 0); - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-ASM: vrepb vsc = vec_splat(vsc, 15); // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> // CHECK-ASM: vrepb vuc = vec_splat(vuc, 0); - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-ASM: vrepb vuc = vec_splat(vuc, 15); // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> // CHECK-ASM: vrepb vbc = vec_splat(vbc, 0); - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-ASM: vrepb vbc = vec_splat(vbc, 15); // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> // CHECK-ASM: vrepb vss = vec_splat(vss, 0); - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-ASM: vreph vss = vec_splat(vss, 7); // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> // CHECK-ASM: vreph vus = vec_splat(vus, 0); - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-ASM: vreph vus = vec_splat(vus, 7); // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> // CHECK-ASM: vreph vbs = vec_splat(vbs, 0); - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-ASM: vreph vbs = vec_splat(vbs, 7); // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> // CHECK-ASM: vreph vsi = vec_splat(vsi, 0); - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-ASM: vrepf vsi = vec_splat(vsi, 3); // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> // CHECK-ASM: vrepf vui = vec_splat(vui, 0); - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-ASM: vrepf vui = vec_splat(vui, 3); // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> // CHECK-ASM: vrepf vbi = vec_splat(vbi, 0); - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-ASM: vrepf vbi = vec_splat(vbi, 3); // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> // CHECK-ASM: vrepf vsl = vec_splat(vsl, 0); - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer // CHECK-ASM: vrepg vsl = vec_splat(vsl, 1); // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> // CHECK-ASM: vrepg vul = vec_splat(vul, 0); - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer // CHECK-ASM: vrepg vul = vec_splat(vul, 1); // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> // CHECK-ASM: vrepg vbl = vec_splat(vbl, 0); - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer // CHECK-ASM: vrepg vbl = vec_splat(vbl, 1); // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> // CHECK-ASM: vrepg vd = vec_splat(vd, 0); - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer // CHECK-ASM: vrepg vd = vec_splat(vd, 1); // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> @@ -841,31 +841,31 @@ void test_core(void) { // CHECK: <2 x i64> vsc = vec_splats(sc); - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-ASM: vlrepb vuc = vec_splats(uc); - // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-ASM: vlrepb vss = vec_splats(ss); - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-ASM: vlreph vus = vec_splats(us); - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-ASM: vlreph vsi = vec_splats(si); - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-ASM: vlrepf vui = vec_splats(ui); - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-ASM: vlrepf vsl = vec_splats(sl); - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer // CHECK-ASM: vlrepg vul = vec_splats(ul); - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer // CHECK-ASM: vlrepg vd = vec_splats(d); - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer // CHECK-ASM: vlrepg vsl = vec_extend_s64(vsc); diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-constrained.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-constrained.c index 4b928ced71c0..31b0f965f383 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-constrained.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-constrained.c @@ -130,23 +130,23 @@ void test_core(void) { // CHECK-ASM: vst vf = vec_splat(vf, 0); - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> zeroinitializer + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> zeroinitializer // CHECK-ASM: vrepf vf = vec_splat(vf, 1); // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> // CHECK-ASM: vrepf vd = vec_splat(vd, 0); - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer // CHECK-ASM: vrepg vd = vec_splat(vd, 1); // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> // CHECK-ASM: vrepg vf = vec_splats(f); - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> zeroinitializer + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> zeroinitializer // CHECK-ASM: vlrepf vd = vec_splats(d); - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer // CHECK-ASM: vlrepg vf = vec_mergeh(vf, vf); diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c index a1b580f8c56f..23332f03450c 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c @@ -235,23 +235,23 @@ void test_core(void) { // CHECK-ASM: vstrlr vf = vec_splat(vf, 0); - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> zeroinitializer + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> zeroinitializer // CHECK-ASM: vrepf vf = vec_splat(vf, 1); // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> // CHECK-ASM: vrepf vd = vec_splat(vd, 0); - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer // CHECK-ASM: vrepg vd = vec_splat(vd, 1); // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> // CHECK-ASM: vrepg vf = vec_splats(f); - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> zeroinitializer + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> zeroinitializer // CHECK-ASM: vlrepf vd = vec_splats(d); - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer // CHECK-ASM: vlrepg vf = vec_mergeh(vf, vf); diff --git a/clang/test/CodeGen/SystemZ/zvector.c b/clang/test/CodeGen/SystemZ/zvector.c index 58fb60e9187f..ff7720ba436c 100644 --- a/clang/test/CodeGen/SystemZ/zvector.c +++ b/clang/test/CodeGen/SystemZ/zvector.c @@ -1924,8 +1924,8 @@ void test_xor_assign(void) { // CHECK: store volatile <16 x i8> [[SHL1]], <16 x i8>* @sc, align 8 // CHECK: [[TMP4:%.*]] = load volatile <16 x i8>, <16 x i8>* @sc, align 8 // CHECK: [[TMP5:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP5]], i32 0 -// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> undef, <16 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP5]], i32 0 +// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> poison, <16 x i32> zeroinitializer // CHECK: [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i8> // CHECK: [[SHL2:%.*]] = shl <16 x i8> [[TMP4]], [[SH_PROM]] // CHECK: store volatile <16 x i8> [[SHL2]], <16 x i8>* @sc, align 8 @@ -1942,8 +1942,8 @@ void test_xor_assign(void) { // CHECK: store volatile <16 x i8> [[SHL5]], <16 x i8>* @uc, align 8 // CHECK: [[TMP11:%.*]] = load volatile <16 x i8>, <16 x i8>* @uc, align 8 // CHECK: [[TMP12:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> undef, i32 [[TMP12]], i32 0 -// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> undef, <16 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP12]], i32 0 +// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer // CHECK: [[SH_PROM8:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT7]] to <16 x i8> // CHECK: [[SHL9:%.*]] = shl <16 x i8> [[TMP11]], [[SH_PROM8]] // CHECK: store volatile <16 x i8> [[SHL9]], <16 x i8>* @uc, align 8 @@ -1960,8 +1960,8 @@ void test_xor_assign(void) { // CHECK: store volatile <8 x i16> [[SHL12]], <8 x i16>* @ss, align 8 // CHECK: [[TMP18:%.*]] = load volatile <8 x i16>, <8 x i16>* @ss, align 8 // CHECK: [[TMP19:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0 -// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP19]], i32 0 +// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[SH_PROM15:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT14]] to <8 x i16> // CHECK: [[SHL16:%.*]] = shl <8 x i16> [[TMP18]], [[SH_PROM15]] // CHECK: store volatile <8 x i16> [[SHL16]], <8 x i16>* @ss, align 8 @@ -1978,8 +1978,8 @@ void test_xor_assign(void) { // CHECK: store volatile <8 x i16> [[SHL19]], <8 x i16>* @us, align 8 // CHECK: [[TMP25:%.*]] = load volatile <8 x i16>, <8 x i16>* @us, align 8 // CHECK: [[TMP26:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> undef, i32 [[TMP26]], i32 0 -// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP26]], i32 0 +// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[SH_PROM22:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT21]] to <8 x i16> // CHECK: [[SHL23:%.*]] = shl <8 x i16> [[TMP25]], [[SH_PROM22]] // CHECK: store volatile <8 x i16> [[SHL23]], <8 x i16>* @us, align 8 @@ -1996,8 +1996,8 @@ void test_xor_assign(void) { // CHECK: store volatile <4 x i32> [[SHL26]], <4 x i32>* @si, align 8 // CHECK: [[TMP32:%.*]] = load volatile <4 x i32>, <4 x i32>* @si, align 8 // CHECK: [[TMP33:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP33]], i32 0 -// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP33]], i32 0 +// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[SHL29:%.*]] = shl <4 x i32> [[TMP32]], [[SPLAT_SPLAT28]] // CHECK: store volatile <4 x i32> [[SHL29]], <4 x i32>* @si, align 8 // CHECK: [[TMP34:%.*]] = load volatile <4 x i32>, <4 x i32>* @si, align 8 @@ -2013,8 +2013,8 @@ void test_xor_assign(void) { // CHECK: store volatile <4 x i32> [[SHL32]], <4 x i32>* @ui, align 8 // CHECK: [[TMP39:%.*]] = load volatile <4 x i32>, <4 x i32>* @ui, align 8 // CHECK: [[TMP40:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> undef, i32 [[TMP40]], i32 0 -// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP40]], i32 0 +// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[SHL35:%.*]] = shl <4 x i32> [[TMP39]], [[SPLAT_SPLAT34]] // CHECK: store volatile <4 x i32> [[SHL35]], <4 x i32>* @ui, align 8 // CHECK: [[TMP41:%.*]] = load volatile <4 x i32>, <4 x i32>* @ui, align 8 @@ -2030,8 +2030,8 @@ void test_xor_assign(void) { // CHECK: store volatile <2 x i64> [[SHL38]], <2 x i64>* @sl, align 8 // CHECK: [[TMP46:%.*]] = load volatile <2 x i64>, <2 x i64>* @sl, align 8 // CHECK: [[TMP47:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> undef, i32 [[TMP47]], i32 0 -// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP47]], i32 0 +// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> poison, <2 x i32> zeroinitializer // CHECK: [[SH_PROM41:%.*]] = zext <2 x i32> [[SPLAT_SPLAT40]] to <2 x i64> // CHECK: [[SHL42:%.*]] = shl <2 x i64> [[TMP46]], [[SH_PROM41]] // CHECK: store volatile <2 x i64> [[SHL42]], <2 x i64>* @sl, align 8 @@ -2048,8 +2048,8 @@ void test_xor_assign(void) { // CHECK: store volatile <2 x i64> [[SHL45]], <2 x i64>* @ul, align 8 // CHECK: [[TMP53:%.*]] = load volatile <2 x i64>, <2 x i64>* @ul, align 8 // CHECK: [[TMP54:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> undef, i32 [[TMP54]], i32 0 -// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP54]], i32 0 +// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> poison, <2 x i32> zeroinitializer // CHECK: [[SH_PROM48:%.*]] = zext <2 x i32> [[SPLAT_SPLAT47]] to <2 x i64> // CHECK: [[SHL49:%.*]] = shl <2 x i64> [[TMP53]], [[SH_PROM48]] // CHECK: store volatile <2 x i64> [[SHL49]], <2 x i64>* @ul, align 8 @@ -2106,8 +2106,8 @@ void test_sl(void) { // CHECK: [[SHL1:%.*]] = shl <16 x i8> [[TMP3]], [[TMP2]] // CHECK: store volatile <16 x i8> [[SHL1]], <16 x i8>* @sc, align 8 // CHECK: [[TMP4:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP4]], i32 0 -// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> undef, <16 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP4]], i32 0 +// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> poison, <16 x i32> zeroinitializer // CHECK: [[TMP5:%.*]] = load volatile <16 x i8>, <16 x i8>* @sc, align 8 // CHECK: [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i8> // CHECK: [[SHL2:%.*]] = shl <16 x i8> [[TMP5]], [[SH_PROM]] @@ -2124,8 +2124,8 @@ void test_sl(void) { // CHECK: [[SHL5:%.*]] = shl <16 x i8> [[TMP10]], [[TMP9]] // CHECK: store volatile <16 x i8> [[SHL5]], <16 x i8>* @uc, align 8 // CHECK: [[TMP11:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> undef, i32 [[TMP11]], i32 0 -// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> undef, <16 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP11]], i32 0 +// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer // CHECK: [[TMP12:%.*]] = load volatile <16 x i8>, <16 x i8>* @uc, align 8 // CHECK: [[SH_PROM8:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT7]] to <16 x i8> // CHECK: [[SHL9:%.*]] = shl <16 x i8> [[TMP12]], [[SH_PROM8]] @@ -2142,8 +2142,8 @@ void test_sl(void) { // CHECK: [[SHL12:%.*]] = shl <8 x i16> [[TMP17]], [[TMP16]] // CHECK: store volatile <8 x i16> [[SHL12]], <8 x i16>* @ss, align 8 // CHECK: [[TMP18:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> undef, i32 [[TMP18]], i32 0 -// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP18]], i32 0 +// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[TMP19:%.*]] = load volatile <8 x i16>, <8 x i16>* @ss, align 8 // CHECK: [[SH_PROM15:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT14]] to <8 x i16> // CHECK: [[SHL16:%.*]] = shl <8 x i16> [[TMP19]], [[SH_PROM15]] @@ -2160,8 +2160,8 @@ void test_sl(void) { // CHECK: [[SHL19:%.*]] = shl <8 x i16> [[TMP24]], [[TMP23]] // CHECK: store volatile <8 x i16> [[SHL19]], <8 x i16>* @us, align 8 // CHECK: [[TMP25:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> undef, i32 [[TMP25]], i32 0 -// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP25]], i32 0 +// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[TMP26:%.*]] = load volatile <8 x i16>, <8 x i16>* @us, align 8 // CHECK: [[SH_PROM22:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT21]] to <8 x i16> // CHECK: [[SHL23:%.*]] = shl <8 x i16> [[TMP26]], [[SH_PROM22]] @@ -2178,8 +2178,8 @@ void test_sl(void) { // CHECK: [[SHL26:%.*]] = shl <4 x i32> [[TMP31]], [[TMP30]] // CHECK: store volatile <4 x i32> [[SHL26]], <4 x i32>* @si, align 8 // CHECK: [[TMP32:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP32]], i32 0 -// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP32]], i32 0 +// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[TMP33:%.*]] = load volatile <4 x i32>, <4 x i32>* @si, align 8 // CHECK: [[SHL29:%.*]] = shl <4 x i32> [[TMP33]], [[SPLAT_SPLAT28]] // CHECK: store volatile <4 x i32> [[SHL29]], <4 x i32>* @si, align 8 @@ -2195,8 +2195,8 @@ void test_sl(void) { // CHECK: [[SHL32:%.*]] = shl <4 x i32> [[TMP38]], [[TMP37]] // CHECK: store volatile <4 x i32> [[SHL32]], <4 x i32>* @ui, align 8 // CHECK: [[TMP39:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> undef, i32 [[TMP39]], i32 0 -// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP39]], i32 0 +// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[TMP40:%.*]] = load volatile <4 x i32>, <4 x i32>* @ui, align 8 // CHECK: [[SHL35:%.*]] = shl <4 x i32> [[TMP40]], [[SPLAT_SPLAT34]] // CHECK: store volatile <4 x i32> [[SHL35]], <4 x i32>* @ui, align 8 @@ -2212,8 +2212,8 @@ void test_sl(void) { // CHECK: [[SHL38:%.*]] = shl <2 x i64> [[TMP45]], [[TMP44]] // CHECK: store volatile <2 x i64> [[SHL38]], <2 x i64>* @sl, align 8 // CHECK: [[TMP46:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> undef, i32 [[TMP46]], i32 0 -// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP46]], i32 0 +// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> poison, <2 x i32> zeroinitializer // CHECK: [[TMP47:%.*]] = load volatile <2 x i64>, <2 x i64>* @sl, align 8 // CHECK: [[SH_PROM41:%.*]] = zext <2 x i32> [[SPLAT_SPLAT40]] to <2 x i64> // CHECK: [[SHL42:%.*]] = shl <2 x i64> [[TMP47]], [[SH_PROM41]] @@ -2230,8 +2230,8 @@ void test_sl(void) { // CHECK: [[SHL45:%.*]] = shl <2 x i64> [[TMP52]], [[TMP51]] // CHECK: store volatile <2 x i64> [[SHL45]], <2 x i64>* @ul, align 8 // CHECK: [[TMP53:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> undef, i32 [[TMP53]], i32 0 -// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP53]], i32 0 +// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> poison, <2 x i32> zeroinitializer // CHECK: [[TMP54:%.*]] = load volatile <2 x i64>, <2 x i64>* @ul, align 8 // CHECK: [[SH_PROM48:%.*]] = zext <2 x i32> [[SPLAT_SPLAT47]] to <2 x i64> // CHECK: [[SHL49:%.*]] = shl <2 x i64> [[TMP54]], [[SH_PROM48]] @@ -2290,8 +2290,8 @@ void test_sl_assign(void) { // CHECK: store volatile <16 x i8> [[SHR1]], <16 x i8>* @sc, align 8 // CHECK: [[TMP4:%.*]] = load volatile <16 x i8>, <16 x i8>* @sc, align 8 // CHECK: [[TMP5:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP5]], i32 0 -// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> undef, <16 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP5]], i32 0 +// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> poison, <16 x i32> zeroinitializer // CHECK: [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i8> // CHECK: [[SHR2:%.*]] = ashr <16 x i8> [[TMP4]], [[SH_PROM]] // CHECK: store volatile <16 x i8> [[SHR2]], <16 x i8>* @sc, align 8 @@ -2308,8 +2308,8 @@ void test_sl_assign(void) { // CHECK: store volatile <16 x i8> [[SHR5]], <16 x i8>* @uc, align 8 // CHECK: [[TMP11:%.*]] = load volatile <16 x i8>, <16 x i8>* @uc, align 8 // CHECK: [[TMP12:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> undef, i32 [[TMP12]], i32 0 -// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> undef, <16 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP12]], i32 0 +// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer // CHECK: [[SH_PROM8:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT7]] to <16 x i8> // CHECK: [[SHR9:%.*]] = lshr <16 x i8> [[TMP11]], [[SH_PROM8]] // CHECK: store volatile <16 x i8> [[SHR9]], <16 x i8>* @uc, align 8 @@ -2326,8 +2326,8 @@ void test_sl_assign(void) { // CHECK: store volatile <8 x i16> [[SHR12]], <8 x i16>* @ss, align 8 // CHECK: [[TMP18:%.*]] = load volatile <8 x i16>, <8 x i16>* @ss, align 8 // CHECK: [[TMP19:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0 -// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP19]], i32 0 +// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[SH_PROM15:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT14]] to <8 x i16> // CHECK: [[SHR16:%.*]] = ashr <8 x i16> [[TMP18]], [[SH_PROM15]] // CHECK: store volatile <8 x i16> [[SHR16]], <8 x i16>* @ss, align 8 @@ -2344,8 +2344,8 @@ void test_sl_assign(void) { // CHECK: store volatile <8 x i16> [[SHR19]], <8 x i16>* @us, align 8 // CHECK: [[TMP25:%.*]] = load volatile <8 x i16>, <8 x i16>* @us, align 8 // CHECK: [[TMP26:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> undef, i32 [[TMP26]], i32 0 -// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP26]], i32 0 +// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[SH_PROM22:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT21]] to <8 x i16> // CHECK: [[SHR23:%.*]] = lshr <8 x i16> [[TMP25]], [[SH_PROM22]] // CHECK: store volatile <8 x i16> [[SHR23]], <8 x i16>* @us, align 8 @@ -2362,8 +2362,8 @@ void test_sl_assign(void) { // CHECK: store volatile <4 x i32> [[SHR26]], <4 x i32>* @si, align 8 // CHECK: [[TMP32:%.*]] = load volatile <4 x i32>, <4 x i32>* @si, align 8 // CHECK: [[TMP33:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP33]], i32 0 -// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP33]], i32 0 +// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[SHR29:%.*]] = ashr <4 x i32> [[TMP32]], [[SPLAT_SPLAT28]] // CHECK: store volatile <4 x i32> [[SHR29]], <4 x i32>* @si, align 8 // CHECK: [[TMP34:%.*]] = load volatile <4 x i32>, <4 x i32>* @si, align 8 @@ -2379,8 +2379,8 @@ void test_sl_assign(void) { // CHECK: store volatile <4 x i32> [[SHR32]], <4 x i32>* @ui, align 8 // CHECK: [[TMP39:%.*]] = load volatile <4 x i32>, <4 x i32>* @ui, align 8 // CHECK: [[TMP40:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> undef, i32 [[TMP40]], i32 0 -// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP40]], i32 0 +// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[SHR35:%.*]] = lshr <4 x i32> [[TMP39]], [[SPLAT_SPLAT34]] // CHECK: store volatile <4 x i32> [[SHR35]], <4 x i32>* @ui, align 8 // CHECK: [[TMP41:%.*]] = load volatile <4 x i32>, <4 x i32>* @ui, align 8 @@ -2396,8 +2396,8 @@ void test_sl_assign(void) { // CHECK: store volatile <2 x i64> [[SHR38]], <2 x i64>* @sl, align 8 // CHECK: [[TMP46:%.*]] = load volatile <2 x i64>, <2 x i64>* @sl, align 8 // CHECK: [[TMP47:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> undef, i32 [[TMP47]], i32 0 -// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP47]], i32 0 +// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> poison, <2 x i32> zeroinitializer // CHECK: [[SH_PROM41:%.*]] = zext <2 x i32> [[SPLAT_SPLAT40]] to <2 x i64> // CHECK: [[SHR42:%.*]] = ashr <2 x i64> [[TMP46]], [[SH_PROM41]] // CHECK: store volatile <2 x i64> [[SHR42]], <2 x i64>* @sl, align 8 @@ -2414,8 +2414,8 @@ void test_sl_assign(void) { // CHECK: store volatile <2 x i64> [[SHR45]], <2 x i64>* @ul, align 8 // CHECK: [[TMP53:%.*]] = load volatile <2 x i64>, <2 x i64>* @ul, align 8 // CHECK: [[TMP54:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> undef, i32 [[TMP54]], i32 0 -// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP54]], i32 0 +// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> poison, <2 x i32> zeroinitializer // CHECK: [[SH_PROM48:%.*]] = zext <2 x i32> [[SPLAT_SPLAT47]] to <2 x i64> // CHECK: [[SHR49:%.*]] = lshr <2 x i64> [[TMP53]], [[SH_PROM48]] // CHECK: store volatile <2 x i64> [[SHR49]], <2 x i64>* @ul, align 8 @@ -2472,8 +2472,8 @@ void test_sr(void) { // CHECK: [[SHR1:%.*]] = ashr <16 x i8> [[TMP3]], [[TMP2]] // CHECK: store volatile <16 x i8> [[SHR1]], <16 x i8>* @sc, align 8 // CHECK: [[TMP4:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP4]], i32 0 -// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> undef, <16 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP4]], i32 0 +// CHECK: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> poison, <16 x i32> zeroinitializer // CHECK: [[TMP5:%.*]] = load volatile <16 x i8>, <16 x i8>* @sc, align 8 // CHECK: [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i8> // CHECK: [[SHR2:%.*]] = ashr <16 x i8> [[TMP5]], [[SH_PROM]] @@ -2490,8 +2490,8 @@ void test_sr(void) { // CHECK: [[SHR5:%.*]] = lshr <16 x i8> [[TMP10]], [[TMP9]] // CHECK: store volatile <16 x i8> [[SHR5]], <16 x i8>* @uc, align 8 // CHECK: [[TMP11:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> undef, i32 [[TMP11]], i32 0 -// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> undef, <16 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP11]], i32 0 +// CHECK: [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer // CHECK: [[TMP12:%.*]] = load volatile <16 x i8>, <16 x i8>* @uc, align 8 // CHECK: [[SH_PROM8:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT7]] to <16 x i8> // CHECK: [[SHR9:%.*]] = lshr <16 x i8> [[TMP12]], [[SH_PROM8]] @@ -2508,8 +2508,8 @@ void test_sr(void) { // CHECK: [[SHR12:%.*]] = ashr <8 x i16> [[TMP17]], [[TMP16]] // CHECK: store volatile <8 x i16> [[SHR12]], <8 x i16>* @ss, align 8 // CHECK: [[TMP18:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> undef, i32 [[TMP18]], i32 0 -// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP18]], i32 0 +// CHECK: [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[TMP19:%.*]] = load volatile <8 x i16>, <8 x i16>* @ss, align 8 // CHECK: [[SH_PROM15:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT14]] to <8 x i16> // CHECK: [[SHR16:%.*]] = ashr <8 x i16> [[TMP19]], [[SH_PROM15]] @@ -2526,8 +2526,8 @@ void test_sr(void) { // CHECK: [[SHR19:%.*]] = lshr <8 x i16> [[TMP24]], [[TMP23]] // CHECK: store volatile <8 x i16> [[SHR19]], <8 x i16>* @us, align 8 // CHECK: [[TMP25:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> undef, i32 [[TMP25]], i32 0 -// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP25]], i32 0 +// CHECK: [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[TMP26:%.*]] = load volatile <8 x i16>, <8 x i16>* @us, align 8 // CHECK: [[SH_PROM22:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT21]] to <8 x i16> // CHECK: [[SHR23:%.*]] = lshr <8 x i16> [[TMP26]], [[SH_PROM22]] @@ -2544,8 +2544,8 @@ void test_sr(void) { // CHECK: [[SHR26:%.*]] = ashr <4 x i32> [[TMP31]], [[TMP30]] // CHECK: store volatile <4 x i32> [[SHR26]], <4 x i32>* @si, align 8 // CHECK: [[TMP32:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> undef, i32 [[TMP32]], i32 0 -// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP32]], i32 0 +// CHECK: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[TMP33:%.*]] = load volatile <4 x i32>, <4 x i32>* @si, align 8 // CHECK: [[SHR29:%.*]] = ashr <4 x i32> [[TMP33]], [[SPLAT_SPLAT28]] // CHECK: store volatile <4 x i32> [[SHR29]], <4 x i32>* @si, align 8 @@ -2561,8 +2561,8 @@ void test_sr(void) { // CHECK: [[SHR32:%.*]] = lshr <4 x i32> [[TMP38]], [[TMP37]] // CHECK: store volatile <4 x i32> [[SHR32]], <4 x i32>* @ui, align 8 // CHECK: [[TMP39:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> undef, i32 [[TMP39]], i32 0 -// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP39]], i32 0 +// CHECK: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[TMP40:%.*]] = load volatile <4 x i32>, <4 x i32>* @ui, align 8 // CHECK: [[SHR35:%.*]] = lshr <4 x i32> [[TMP40]], [[SPLAT_SPLAT34]] // CHECK: store volatile <4 x i32> [[SHR35]], <4 x i32>* @ui, align 8 @@ -2578,8 +2578,8 @@ void test_sr(void) { // CHECK: [[SHR38:%.*]] = ashr <2 x i64> [[TMP45]], [[TMP44]] // CHECK: store volatile <2 x i64> [[SHR38]], <2 x i64>* @sl, align 8 // CHECK: [[TMP46:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> undef, i32 [[TMP46]], i32 0 -// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP46]], i32 0 +// CHECK: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> poison, <2 x i32> zeroinitializer // CHECK: [[TMP47:%.*]] = load volatile <2 x i64>, <2 x i64>* @sl, align 8 // CHECK: [[SH_PROM41:%.*]] = zext <2 x i32> [[SPLAT_SPLAT40]] to <2 x i64> // CHECK: [[SHR42:%.*]] = ashr <2 x i64> [[TMP47]], [[SH_PROM41]] @@ -2596,8 +2596,8 @@ void test_sr(void) { // CHECK: [[SHR45:%.*]] = lshr <2 x i64> [[TMP52]], [[TMP51]] // CHECK: store volatile <2 x i64> [[SHR45]], <2 x i64>* @ul, align 8 // CHECK: [[TMP53:%.*]] = load volatile i32, i32* @cnt, align 4 -// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> undef, i32 [[TMP53]], i32 0 -// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> undef, <2 x i32> zeroinitializer +// CHECK: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP53]], i32 0 +// CHECK: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> poison, <2 x i32> zeroinitializer // CHECK: [[TMP54:%.*]] = load volatile <2 x i64>, <2 x i64>* @ul, align 8 // CHECK: [[SH_PROM48:%.*]] = zext <2 x i32> [[SPLAT_SPLAT47]] to <2 x i64> // CHECK: [[SHR49:%.*]] = lshr <2 x i64> [[TMP54]], [[SH_PROM48]] diff --git a/clang/test/CodeGen/arm-mve-intrinsics/compare.c b/clang/test/CodeGen/arm-mve-intrinsics/compare.c index f2cdc7dc6b29..efe5a56f7efb 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/compare.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/compare.c @@ -134,8 +134,8 @@ mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b) // CHECK-LABEL: @test_vcmpeqq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -152,8 +152,8 @@ mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b) // CHECK-LABEL: @test_vcmpeqq_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -170,8 +170,8 @@ mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b) // CHECK-LABEL: @test_vcmpeqq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -188,8 +188,8 @@ mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b) // CHECK-LABEL: @test_vcmpeqq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -206,8 +206,8 @@ mve_pred16_t test_vcmpeqq_n_s16(int16x8_t a, int16_t b) // CHECK-LABEL: @test_vcmpeqq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -224,8 +224,8 @@ mve_pred16_t test_vcmpeqq_n_s32(int32x4_t a, int32_t b) // CHECK-LABEL: @test_vcmpeqq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -242,8 +242,8 @@ mve_pred16_t test_vcmpeqq_n_u8(uint8x16_t a, uint8_t b) // CHECK-LABEL: @test_vcmpeqq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -260,8 +260,8 @@ mve_pred16_t test_vcmpeqq_n_u16(uint16x8_t a, uint16_t b) // CHECK-LABEL: @test_vcmpeqq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -432,8 +432,8 @@ mve_pred16_t test_vcmpeqq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -453,8 +453,8 @@ mve_pred16_t test_vcmpeqq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -474,8 +474,8 @@ mve_pred16_t test_vcmpeqq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]]) @@ -495,8 +495,8 @@ mve_pred16_t test_vcmpeqq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -516,8 +516,8 @@ mve_pred16_t test_vcmpeqq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -537,8 +537,8 @@ mve_pred16_t test_vcmpeqq_m_n_s32(int32x4_t a, int32_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]]) @@ -558,8 +558,8 @@ mve_pred16_t test_vcmpeqq_m_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -579,8 +579,8 @@ mve_pred16_t test_vcmpeqq_m_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -726,8 +726,8 @@ mve_pred16_t test_vcmpneq_u32(uint32x4_t a, uint32x4_t b) // CHECK-LABEL: @test_vcmpneq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -744,8 +744,8 @@ mve_pred16_t test_vcmpneq_n_f16(float16x8_t a, float16_t b) // CHECK-LABEL: @test_vcmpneq_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fcmp une <4 x float> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -762,8 +762,8 @@ mve_pred16_t test_vcmpneq_n_f32(float32x4_t a, float32_t b) // CHECK-LABEL: @test_vcmpneq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp ne <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -780,8 +780,8 @@ mve_pred16_t test_vcmpneq_n_s8(int8x16_t a, int8_t b) // CHECK-LABEL: @test_vcmpneq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -798,8 +798,8 @@ mve_pred16_t test_vcmpneq_n_s16(int16x8_t a, int16_t b) // CHECK-LABEL: @test_vcmpneq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -816,8 +816,8 @@ mve_pred16_t test_vcmpneq_n_s32(int32x4_t a, int32_t b) // CHECK-LABEL: @test_vcmpneq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp ne <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -834,8 +834,8 @@ mve_pred16_t test_vcmpneq_n_u8(uint8x16_t a, uint8_t b) // CHECK-LABEL: @test_vcmpneq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -852,8 +852,8 @@ mve_pred16_t test_vcmpneq_n_u16(uint16x8_t a, uint16_t b) // CHECK-LABEL: @test_vcmpneq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -1024,8 +1024,8 @@ mve_pred16_t test_vcmpneq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -1045,8 +1045,8 @@ mve_pred16_t test_vcmpneq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = fcmp une <4 x float> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -1066,8 +1066,8 @@ mve_pred16_t test_vcmpneq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp ne <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]]) @@ -1087,8 +1087,8 @@ mve_pred16_t test_vcmpneq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -1108,8 +1108,8 @@ mve_pred16_t test_vcmpneq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -1129,8 +1129,8 @@ mve_pred16_t test_vcmpneq_m_n_s32(int32x4_t a, int32_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp ne <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]]) @@ -1150,8 +1150,8 @@ mve_pred16_t test_vcmpneq_m_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -1171,8 +1171,8 @@ mve_pred16_t test_vcmpneq_m_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -1318,8 +1318,8 @@ mve_pred16_t test_vcmpcsq_u32(uint32x4_t a, uint32x4_t b) // CHECK-LABEL: @test_vcmpgeq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -1336,8 +1336,8 @@ mve_pred16_t test_vcmpgeq_n_f16(float16x8_t a, float16_t b) // CHECK-LABEL: @test_vcmpgeq_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fcmp oge <4 x float> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -1354,8 +1354,8 @@ mve_pred16_t test_vcmpgeq_n_f32(float32x4_t a, float32_t b) // CHECK-LABEL: @test_vcmpgeq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp sge <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -1372,8 +1372,8 @@ mve_pred16_t test_vcmpgeq_n_s8(int8x16_t a, int8_t b) // CHECK-LABEL: @test_vcmpgeq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp sge <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -1390,8 +1390,8 @@ mve_pred16_t test_vcmpgeq_n_s16(int16x8_t a, int16_t b) // CHECK-LABEL: @test_vcmpgeq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -1408,8 +1408,8 @@ mve_pred16_t test_vcmpgeq_n_s32(int32x4_t a, int32_t b) // CHECK-LABEL: @test_vcmpcsq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -1426,8 +1426,8 @@ mve_pred16_t test_vcmpcsq_n_u8(uint8x16_t a, uint8_t b) // CHECK-LABEL: @test_vcmpcsq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -1444,8 +1444,8 @@ mve_pred16_t test_vcmpcsq_n_u16(uint16x8_t a, uint16_t b) // CHECK-LABEL: @test_vcmpcsq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -1616,8 +1616,8 @@ mve_pred16_t test_vcmpcsq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -1637,8 +1637,8 @@ mve_pred16_t test_vcmpgeq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = fcmp oge <4 x float> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -1658,8 +1658,8 @@ mve_pred16_t test_vcmpgeq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp sge <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]]) @@ -1679,8 +1679,8 @@ mve_pred16_t test_vcmpgeq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -1700,8 +1700,8 @@ mve_pred16_t test_vcmpgeq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -1721,8 +1721,8 @@ mve_pred16_t test_vcmpgeq_m_n_s32(int32x4_t a, int32_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp uge <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]]) @@ -1742,8 +1742,8 @@ mve_pred16_t test_vcmpcsq_m_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp uge <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -1763,8 +1763,8 @@ mve_pred16_t test_vcmpcsq_m_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -1910,8 +1910,8 @@ mve_pred16_t test_vcmphiq_u32(uint32x4_t a, uint32x4_t b) // CHECK-LABEL: @test_vcmpgtq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -1928,8 +1928,8 @@ mve_pred16_t test_vcmpgtq_n_f16(float16x8_t a, float16_t b) // CHECK-LABEL: @test_vcmpgtq_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fcmp ogt <4 x float> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -1946,8 +1946,8 @@ mve_pred16_t test_vcmpgtq_n_f32(float32x4_t a, float32_t b) // CHECK-LABEL: @test_vcmpgtq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -1964,8 +1964,8 @@ mve_pred16_t test_vcmpgtq_n_s8(int8x16_t a, int8_t b) // CHECK-LABEL: @test_vcmpgtq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -1982,8 +1982,8 @@ mve_pred16_t test_vcmpgtq_n_s16(int16x8_t a, int16_t b) // CHECK-LABEL: @test_vcmpgtq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -2000,8 +2000,8 @@ mve_pred16_t test_vcmpgtq_n_s32(int32x4_t a, int32_t b) // CHECK-LABEL: @test_vcmphiq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -2018,8 +2018,8 @@ mve_pred16_t test_vcmphiq_n_u8(uint8x16_t a, uint8_t b) // CHECK-LABEL: @test_vcmphiq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -2036,8 +2036,8 @@ mve_pred16_t test_vcmphiq_n_u16(uint16x8_t a, uint16_t b) // CHECK-LABEL: @test_vcmphiq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -2208,8 +2208,8 @@ mve_pred16_t test_vcmphiq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -2229,8 +2229,8 @@ mve_pred16_t test_vcmpgtq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -2250,8 +2250,8 @@ mve_pred16_t test_vcmpgtq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]]) @@ -2271,8 +2271,8 @@ mve_pred16_t test_vcmpgtq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -2292,8 +2292,8 @@ mve_pred16_t test_vcmpgtq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -2313,8 +2313,8 @@ mve_pred16_t test_vcmpgtq_m_n_s32(int32x4_t a, int32_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]]) @@ -2334,8 +2334,8 @@ mve_pred16_t test_vcmphiq_m_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -2355,8 +2355,8 @@ mve_pred16_t test_vcmphiq_m_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -2454,8 +2454,8 @@ mve_pred16_t test_vcmpleq_s32(int32x4_t a, int32x4_t b) // CHECK-LABEL: @test_vcmpleq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -2472,8 +2472,8 @@ mve_pred16_t test_vcmpleq_n_f16(float16x8_t a, float16_t b) // CHECK-LABEL: @test_vcmpleq_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <4 x float> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -2490,8 +2490,8 @@ mve_pred16_t test_vcmpleq_n_f32(float32x4_t a, float32_t b) // CHECK-LABEL: @test_vcmpleq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp sle <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -2508,8 +2508,8 @@ mve_pred16_t test_vcmpleq_n_s8(int8x16_t a, int8_t b) // CHECK-LABEL: @test_vcmpleq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp sle <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -2526,8 +2526,8 @@ mve_pred16_t test_vcmpleq_n_s16(int16x8_t a, int16_t b) // CHECK-LABEL: @test_vcmpleq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp sle <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -2641,8 +2641,8 @@ mve_pred16_t test_vcmpleq_m_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -2662,8 +2662,8 @@ mve_pred16_t test_vcmpleq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = fcmp ole <4 x float> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -2683,8 +2683,8 @@ mve_pred16_t test_vcmpleq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp sle <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]]) @@ -2704,8 +2704,8 @@ mve_pred16_t test_vcmpleq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -2725,8 +2725,8 @@ mve_pred16_t test_vcmpleq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -2824,8 +2824,8 @@ mve_pred16_t test_vcmpltq_s32(int32x4_t a, int32x4_t b) // CHECK-LABEL: @test_vcmpltq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -2842,8 +2842,8 @@ mve_pred16_t test_vcmpltq_n_f16(float16x8_t a, float16_t b) // CHECK-LABEL: @test_vcmpltq_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -2860,8 +2860,8 @@ mve_pred16_t test_vcmpltq_n_f32(float32x4_t a, float32_t b) // CHECK-LABEL: @test_vcmpltq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -2878,8 +2878,8 @@ mve_pred16_t test_vcmpltq_n_s8(int8x16_t a, int8_t b) // CHECK-LABEL: @test_vcmpltq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -2896,8 +2896,8 @@ mve_pred16_t test_vcmpltq_n_s16(int16x8_t a, int16_t b) // CHECK-LABEL: @test_vcmpltq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 @@ -3011,8 +3011,8 @@ mve_pred16_t test_vcmpltq_m_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -3032,8 +3032,8 @@ mve_pred16_t test_vcmpltq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) @@ -3053,8 +3053,8 @@ mve_pred16_t test_vcmpltq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp slt <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]]) @@ -3074,8 +3074,8 @@ mve_pred16_t test_vcmpltq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) @@ -3095,8 +3095,8 @@ mve_pred16_t test_vcmpltq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp index c43b8ea9bc57..59604c58b740 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp +++ b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp @@ -78,8 +78,8 @@ mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b) // CHECK-LABEL: @_Z18test_vcmpeqq_n_f1619__simd128_float16_tDh( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 diff --git a/clang/test/CodeGen/arm-mve-intrinsics/dup.c b/clang/test/CodeGen/arm-mve-intrinsics/dup.c index b443917cb258..f3c694a9b03e 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/dup.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/dup.c @@ -6,8 +6,8 @@ // CHECK-LABEL: @test_vdupq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: ret <8 x half> [[DOTSPLAT]] // float16x8_t test_vdupq_n_f16(float16_t a) @@ -17,8 +17,8 @@ float16x8_t test_vdupq_n_f16(float16_t a) // CHECK-LABEL: @test_vdupq_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: ret <4 x float> [[DOTSPLAT]] // float32x4_t test_vdupq_n_f32(float32_t a) @@ -28,8 +28,8 @@ float32x4_t test_vdupq_n_f32(float32_t a) // CHECK-LABEL: @test_vdupq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: ret <16 x i8> [[DOTSPLAT]] // int8x16_t test_vdupq_n_s8(int8_t a) @@ -39,8 +39,8 @@ int8x16_t test_vdupq_n_s8(int8_t a) // CHECK-LABEL: @test_vdupq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: ret <8 x i16> [[DOTSPLAT]] // int16x8_t test_vdupq_n_s16(int16_t a) @@ -50,8 +50,8 @@ int16x8_t test_vdupq_n_s16(int16_t a) // CHECK-LABEL: @test_vdupq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: ret <4 x i32> [[DOTSPLAT]] // int32x4_t test_vdupq_n_s32(int32_t a) @@ -61,8 +61,8 @@ int32x4_t test_vdupq_n_s32(int32_t a) // CHECK-LABEL: @test_vdupq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: ret <16 x i8> [[DOTSPLAT]] // uint8x16_t test_vdupq_n_u8(uint8_t a) @@ -72,8 +72,8 @@ uint8x16_t test_vdupq_n_u8(uint8_t a) // CHECK-LABEL: @test_vdupq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: ret <8 x i16> [[DOTSPLAT]] // uint16x8_t test_vdupq_n_u16(uint16_t a) @@ -83,8 +83,8 @@ uint16x8_t test_vdupq_n_u16(uint16_t a) // CHECK-LABEL: @test_vdupq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: ret <4 x i32> [[DOTSPLAT]] // uint32x4_t test_vdupq_n_u32(uint32_t a) @@ -96,8 +96,8 @@ uint32x4_t test_vdupq_n_u32(uint32_t a) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x half> [[DOTSPLAT]], <8 x half> [[INACTIVE:%.*]] // CHECK-NEXT: ret <8 x half> [[TMP2]] // @@ -114,8 +114,8 @@ float16x8_t test_vdupq_m_n_f16(float16x8_t inactive, float16_t a, mve_pred16_t p // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[DOTSPLAT]], <4 x float> [[INACTIVE:%.*]] // CHECK-NEXT: ret <4 x float> [[TMP2]] // @@ -132,8 +132,8 @@ float32x4_t test_vdupq_m_n_f32(float32x4_t inactive, float32_t a, mve_pred16_t p // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> [[INACTIVE:%.*]] // CHECK-NEXT: ret <16 x i8> [[TMP2]] // @@ -150,8 +150,8 @@ int8x16_t test_vdupq_m_n_s8(int8x16_t inactive, int8_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> [[INACTIVE:%.*]] // CHECK-NEXT: ret <8 x i16> [[TMP2]] // @@ -168,8 +168,8 @@ int16x8_t test_vdupq_m_n_s16(int16x8_t inactive, int16_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> [[INACTIVE:%.*]] // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -186,8 +186,8 @@ int32x4_t test_vdupq_m_n_s32(int32x4_t inactive, int32_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> [[INACTIVE:%.*]] // CHECK-NEXT: ret <16 x i8> [[TMP2]] // @@ -204,8 +204,8 @@ uint8x16_t test_vdupq_m_n_u8(uint8x16_t inactive, uint8_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> [[INACTIVE:%.*]] // CHECK-NEXT: ret <8 x i16> [[TMP2]] // @@ -222,8 +222,8 @@ uint16x8_t test_vdupq_m_n_u16(uint16x8_t inactive, uint16_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> [[INACTIVE:%.*]] // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -240,8 +240,8 @@ uint32x4_t test_vdupq_m_n_u32(uint32x4_t inactive, uint32_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x half> [[DOTSPLAT]], <8 x half> undef // CHECK-NEXT: ret <8 x half> [[TMP2]] // @@ -254,8 +254,8 @@ float16x8_t test_vdupq_x_n_f16(float16_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[DOTSPLAT]], <4 x float> undef // CHECK-NEXT: ret <4 x float> [[TMP2]] // @@ -268,8 +268,8 @@ float32x4_t test_vdupq_x_n_f32(float32_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> undef // CHECK-NEXT: ret <16 x i8> [[TMP2]] // @@ -282,8 +282,8 @@ int8x16_t test_vdupq_x_n_s8(int8_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> undef // CHECK-NEXT: ret <8 x i16> [[TMP2]] // @@ -296,8 +296,8 @@ int16x8_t test_vdupq_x_n_s16(int16_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> undef // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -310,8 +310,8 @@ int32x4_t test_vdupq_x_n_s32(int32_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> undef // CHECK-NEXT: ret <16 x i8> [[TMP2]] // @@ -324,8 +324,8 @@ uint8x16_t test_vdupq_x_n_u8(uint8_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> undef // CHECK-NEXT: ret <8 x i16> [[TMP2]] // @@ -338,8 +338,8 @@ uint16x8_t test_vdupq_x_n_u16(uint16_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> undef // CHECK-NEXT: ret <4 x i32> [[TMP2]] // diff --git a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c index fd771b9f325a..42408974a91b 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c @@ -32,8 +32,8 @@ float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { // CHECK-LABEL: @test_vfmaq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]]) // CHECK-NEXT: ret <8 x half> [[TMP0]] // @@ -47,8 +47,8 @@ float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) { // CHECK-LABEL: @test_vfmaq_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]], <4 x float> [[A:%.*]]) // CHECK-NEXT: ret <4 x float> [[TMP0]] // @@ -62,8 +62,8 @@ float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { // CHECK-LABEL: @test_vfmasq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]]) // CHECK-NEXT: ret <8 x half> [[TMP0]] // @@ -77,8 +77,8 @@ float16x8_t test_vfmasq_n_f16(float16x8_t a, float16x8_t b, float16_t c) { // CHECK-LABEL: @test_vfmasq_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]]) // CHECK-NEXT: ret <4 x float> [[TMP0]] // @@ -120,8 +120,8 @@ float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { // CHECK-LABEL: @test_vmlaq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[B:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[A:%.*]] // CHECK-NEXT: ret <16 x i8> [[TMP1]] @@ -136,8 +136,8 @@ int8x16_t test_vmlaq_n_s8(int8x16_t a, int8x16_t b, int8_t c) { // CHECK-LABEL: @test_vmlaq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[B:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[A:%.*]] // CHECK-NEXT: ret <8 x i16> [[TMP1]] @@ -152,8 +152,8 @@ int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { // CHECK-LABEL: @test_vmlaq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[B:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[A:%.*]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -168,8 +168,8 @@ int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { // CHECK-LABEL: @test_vmlaq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[B:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[A:%.*]] // CHECK-NEXT: ret <16 x i8> [[TMP1]] @@ -184,8 +184,8 @@ uint8x16_t test_vmlaq_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c) { // CHECK-LABEL: @test_vmlaq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[B:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[A:%.*]] // CHECK-NEXT: ret <8 x i16> [[TMP1]] @@ -200,8 +200,8 @@ uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { // CHECK-LABEL: @test_vmlaq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[B:%.*]], [[DOTSPLAT]] // CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[A:%.*]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -217,8 +217,8 @@ uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { // CHECK-LABEL: @test_vmlasq_n_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[DOTSPLAT]] // CHECK-NEXT: ret <16 x i8> [[TMP1]] // @@ -233,8 +233,8 @@ int8x16_t test_vmlasq_n_s8(int8x16_t a, int8x16_t b, int8_t c) { // CHECK-LABEL: @test_vmlasq_n_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[DOTSPLAT]] // CHECK-NEXT: ret <8 x i16> [[TMP1]] // @@ -249,8 +249,8 @@ int16x8_t test_vmlasq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { // CHECK-LABEL: @test_vmlasq_n_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[DOTSPLAT]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -265,8 +265,8 @@ int32x4_t test_vmlasq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { // CHECK-LABEL: @test_vmlasq_n_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[DOTSPLAT]] // CHECK-NEXT: ret <16 x i8> [[TMP1]] // @@ -281,8 +281,8 @@ uint8x16_t test_vmlasq_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c) { // CHECK-LABEL: @test_vmlasq_n_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[DOTSPLAT]] // CHECK-NEXT: ret <8 x i16> [[TMP1]] // @@ -297,8 +297,8 @@ uint16x8_t test_vmlasq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { // CHECK-LABEL: @test_vmlasq_n_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[DOTSPLAT]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -506,8 +506,8 @@ float32x4_t test_vfmaq_m_f32(float32x4_t a, float32x4_t b, float32x4_t c, mve_pr // CHECK-LABEL: @test_vfmaq_m_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]], <8 x i1> [[TMP1]]) @@ -523,8 +523,8 @@ float16x8_t test_vfmaq_m_n_f16(float16x8_t a, float16x8_t b, float16_t c, mve_pr // CHECK-LABEL: @test_vfmaq_m_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]], <4 x float> [[A:%.*]], <4 x i1> [[TMP1]]) @@ -540,8 +540,8 @@ float32x4_t test_vfmaq_m_n_f32(float32x4_t a, float32x4_t b, float32_t c, mve_pr // CHECK-LABEL: @test_vfmasq_m_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]]) @@ -557,8 +557,8 @@ float16x8_t test_vfmasq_m_n_f16(float16x8_t a, float16x8_t b, float16_t c, mve_p // CHECK-LABEL: @test_vfmasq_m_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[C:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]]) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c index a4bb90f280a2..ca529457ed63 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c @@ -98,8 +98,8 @@ float16x8_t test_vaddq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vaddq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = add <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -114,8 +114,8 @@ uint32x4_t test_vaddq_n_u32(uint32x4_t a, uint32_t b) // CHECK-LABEL: @test_vaddq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: ret <8 x half> [[TMP0]] // @@ -130,8 +130,8 @@ float16x8_t test_vaddq_n_f16(float16x8_t a, float16_t b) // CHECK-LABEL: @test_vaddq_m_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) @@ -148,8 +148,8 @@ int8x16_t test_vaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1 // CHECK-LABEL: @test_vaddq_m_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) @@ -166,8 +166,8 @@ float32x4_t test_vaddq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b, // CHECK-LABEL: @test_vaddq_x_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) @@ -184,8 +184,8 @@ uint16x8_t test_vaddq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p) // CHECK-LABEL: @test_vaddq_x_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vhaddq.c b/clang/test/CodeGen/arm-mve-intrinsics/vhaddq.c index b3c34dee1503..3abf36bc37f2 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vhaddq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vhaddq.c @@ -144,8 +144,8 @@ uint32x4_t test_vhaddq_x_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) // CHECK-LABEL: @test_vhaddq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vhadd.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 1) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // @@ -160,8 +160,8 @@ uint8x16_t test_vhaddq_n_u8(uint8x16_t a, uint8_t b) // CHECK-LABEL: @test_vhaddq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vhadd.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // @@ -176,8 +176,8 @@ int16x8_t test_vhaddq_n_s16(int16x8_t a, int16_t b) // CHECK-LABEL: @test_vhaddq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vhadd.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -192,8 +192,8 @@ uint32x4_t test_vhaddq_n_u32(uint32x4_t a, uint32_t b) // CHECK-LABEL: @test_vhaddq_m_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.hadd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) @@ -210,8 +210,8 @@ int8x16_t test_vhaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred // CHECK-LABEL: @test_vhaddq_m_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.hadd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) @@ -228,8 +228,8 @@ uint16x8_t test_vhaddq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mv // CHECK-LABEL: @test_vhaddq_m_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) @@ -246,8 +246,8 @@ int32x4_t test_vhaddq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pr // CHECK-LABEL: @test_vhaddq_x_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.hadd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) @@ -264,8 +264,8 @@ uint8x16_t test_vhaddq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vhaddq_x_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.hadd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) @@ -282,8 +282,8 @@ int16x8_t test_vhaddq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p) // CHECK-LABEL: @test_vhaddq_x_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vhsubq.c b/clang/test/CodeGen/arm-mve-intrinsics/vhsubq.c index 0b01a5f0b05b..04aabe3952a5 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vhsubq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vhsubq.c @@ -96,8 +96,8 @@ int32x4_t test_vhsubq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pr // CHECK-LABEL: @test_vhsubq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vhsub.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 1) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // @@ -112,8 +112,8 @@ uint8x16_t test_vhsubq_n_u8(uint8x16_t a, uint8_t b) // CHECK-LABEL: @test_vhsubq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vhsub.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // @@ -128,8 +128,8 @@ int16x8_t test_vhsubq_n_s16(int16x8_t a, int16_t b) // CHECK-LABEL: @test_vhsubq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vhsub.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -144,8 +144,8 @@ uint32x4_t test_vhsubq_n_u32(uint32x4_t a, uint32_t b) // CHECK-LABEL: @test_vhsubq_m_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.hsub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) @@ -162,8 +162,8 @@ int8x16_t test_vhsubq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred // CHECK-LABEL: @test_vhsubq_m_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.hsub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) @@ -180,8 +180,8 @@ uint16x8_t test_vhsubq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mv // CHECK-LABEL: @test_vhsubq_m_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.hsub.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) @@ -198,8 +198,8 @@ int32x4_t test_vhsubq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pr // CHECK-LABEL: @test_vhsubq_x_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.hsub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) @@ -216,8 +216,8 @@ uint8x16_t test_vhsubq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vhsubq_x_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.hsub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) @@ -234,8 +234,8 @@ int16x8_t test_vhsubq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p) // CHECK-LABEL: @test_vhsubq_x_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.hsub.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c index 48f05f5a6895..214e9d95cacd 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c @@ -190,8 +190,8 @@ float32x4_t test_vmulq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmulq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: ret <16 x i8> [[TMP0]] // @@ -206,8 +206,8 @@ uint8x16_t test_vmulq_n_u8(uint8x16_t a, uint8_t b) // CHECK-LABEL: @test_vmulq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: ret <8 x i16> [[TMP0]] // @@ -222,8 +222,8 @@ int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) // CHECK-LABEL: @test_vmulq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -238,8 +238,8 @@ uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) // CHECK-LABEL: @test_vmulq_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fmul <4 x float> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: ret <4 x float> [[TMP0]] // @@ -254,8 +254,8 @@ float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) // CHECK-LABEL: @test_vmulq_m_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) @@ -272,8 +272,8 @@ int8x16_t test_vmulq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1 // CHECK-LABEL: @test_vmulq_m_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) @@ -290,8 +290,8 @@ uint16x8_t test_vmulq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve // CHECK-LABEL: @test_vmulq_m_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) @@ -308,8 +308,8 @@ int32x4_t test_vmulq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pre // CHECK-LABEL: @test_vmulq_m_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) @@ -326,8 +326,8 @@ float16x8_t test_vmulq_m_n_f16(float16x8_t inactive, float16x8_t a, float16_t b, // CHECK-LABEL: @test_vmulq_x_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> undef) @@ -344,8 +344,8 @@ uint8x16_t test_vmulq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmulq_x_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) @@ -361,8 +361,8 @@ int16x8_t test_vmulq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p) } // CHECK-LABEL: @test_vmulq_x_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> undef) @@ -379,8 +379,8 @@ uint32x4_t test_vmulq_x_n_u32(uint32x4_t a, uint32_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmulq_x_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> undef) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c b/clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c index 1b0d20ea8737..1d05e9dcefaf 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c @@ -96,8 +96,8 @@ int32x4_t test_vqaddq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pr // CHECK-LABEL: @test_vqaddq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]]) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // @@ -112,8 +112,8 @@ uint8x16_t test_vqaddq_n_u8(uint8x16_t a, uint8_t b) // CHECK-LABEL: @test_vqaddq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]]) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // @@ -128,8 +128,8 @@ int16x8_t test_vqaddq_n_s16(int16x8_t a, int16_t b) // CHECK-LABEL: @test_vqaddq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]]) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -144,8 +144,8 @@ uint32x4_t test_vqaddq_n_u32(uint32x4_t a, uint32_t b) // CHECK-LABEL: @test_vqaddq_m_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qadd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) @@ -162,8 +162,8 @@ int8x16_t test_vqaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred // CHECK-LABEL: @test_vqaddq_m_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qadd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) @@ -180,8 +180,8 @@ uint16x8_t test_vqaddq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mv // CHECK-LABEL: @test_vqaddq_m_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qadd.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c b/clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c index b8a24e8d8f82..45a2b5eb7984 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c @@ -96,8 +96,8 @@ int32x4_t test_vqdmulhq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_ // CHECK-LABEL: @test_vqdmulhq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmulh.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]]) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // @@ -112,8 +112,8 @@ int8x16_t test_vqdmulhq_n_s8(int8x16_t a, int8_t b) // CHECK-LABEL: @test_vqdmulhq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmulh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]]) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // @@ -128,8 +128,8 @@ int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) // CHECK-LABEL: @test_vqdmulhq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmulh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]]) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -144,8 +144,8 @@ int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) // CHECK-LABEL: @test_vqdmulhq_m_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qdmulh.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) @@ -162,8 +162,8 @@ int8x16_t test_vqdmulhq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pr // CHECK-LABEL: @test_vqdmulhq_m_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qdmulh.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) @@ -180,8 +180,8 @@ int16x8_t test_vqdmulhq_m_n_s16(int16x8_t inactive, int16x8_t a, int16_t b, mve_ // CHECK-LABEL: @test_vqdmulhq_m_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vqdmullbq.c b/clang/test/CodeGen/arm-mve-intrinsics/vqdmullbq.c index b38d20b6c206..cf9ffdf13872 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vqdmullbq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vqdmullbq.c @@ -62,8 +62,8 @@ int64x2_t test_vqdmullbq_m_s32(int64x2_t inactive, int32x4_t a, int32x4_t b, mve // CHECK-LABEL: @test_vqdmullbq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -77,8 +77,8 @@ int32x4_t test_vqdmullbq_n_s16(int16x8_t a, int16_t b) { // CHECK-LABEL: @test_vqdmullbq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0) // CHECK-NEXT: ret <2 x i64> [[TMP0]] // @@ -92,8 +92,8 @@ int64x2_t test_vqdmullbq_n_s32(int32x4_t a, int32_t b) { // CHECK-LABEL: @test_vqdmullbq_m_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) @@ -109,8 +109,8 @@ int32x4_t test_vqdmullbq_m_n_s16(int32x4_t inactive, int16x8_t a, int16_t b, mve // CHECK-LABEL: @test_vqdmullbq_m_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0, <4 x i1> [[TMP1]], <2 x i64> [[INACTIVE:%.*]]) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vqdmulltq.c b/clang/test/CodeGen/arm-mve-intrinsics/vqdmulltq.c index 095ec60f64a3..26b216c8dc17 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vqdmulltq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vqdmulltq.c @@ -62,8 +62,8 @@ int64x2_t test_vqdmulltq_m_s32(int64x2_t inactive, int32x4_t a, int32x4_t b, mve // CHECK-LABEL: @test_vqdmulltq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -77,8 +77,8 @@ int32x4_t test_vqdmulltq_n_s16(int16x8_t a, int16_t b) { // CHECK-LABEL: @test_vqdmulltq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1) // CHECK-NEXT: ret <2 x i64> [[TMP0]] // @@ -92,8 +92,8 @@ int64x2_t test_vqdmulltq_n_s32(int32x4_t a, int32_t b) { // CHECK-LABEL: @test_vqdmulltq_m_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) @@ -109,8 +109,8 @@ int32x4_t test_vqdmulltq_m_n_s16(int32x4_t inactive, int16x8_t a, int16_t b, mve // CHECK-LABEL: @test_vqdmulltq_m_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1, <4 x i1> [[TMP1]], <2 x i64> [[INACTIVE:%.*]]) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c b/clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c index b90a00102646..b62fe8de9c32 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c @@ -96,8 +96,8 @@ int32x4_t test_vqrdmulhq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve // CHECK-LABEL: @test_vqrdmulhq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqrdmulh.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]]) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // @@ -112,8 +112,8 @@ int8x16_t test_vqrdmulhq_n_s8(int8x16_t a, int8_t b) // CHECK-LABEL: @test_vqrdmulhq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqrdmulh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]]) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // @@ -128,8 +128,8 @@ int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) // CHECK-LABEL: @test_vqrdmulhq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]]) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -144,8 +144,8 @@ int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) // CHECK-LABEL: @test_vqrdmulhq_m_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qrdmulh.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) @@ -162,8 +162,8 @@ int8x16_t test_vqrdmulhq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_p // CHECK-LABEL: @test_vqrdmulhq_m_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qrdmulh.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) @@ -180,8 +180,8 @@ int16x8_t test_vqrdmulhq_m_n_s16(int16x8_t inactive, int16x8_t a, int16_t b, mve // CHECK-LABEL: @test_vqrdmulhq_m_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qrdmulh.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c b/clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c index a50c1ba33b10..d6ac32b05b2c 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c @@ -96,8 +96,8 @@ int32x4_t test_vqsubq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pr // CHECK-LABEL: @test_vqsubq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]]) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // @@ -112,8 +112,8 @@ uint8x16_t test_vqsubq_n_u8(uint8x16_t a, uint8_t b) // CHECK-LABEL: @test_vqsubq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]]) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // @@ -128,8 +128,8 @@ int16x8_t test_vqsubq_n_s16(int16x8_t a, int16_t b) // CHECK-LABEL: @test_vqsubq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]]) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -144,8 +144,8 @@ uint32x4_t test_vqsubq_n_u32(uint32x4_t a, uint32_t b) // CHECK-LABEL: @test_vqsubq_m_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qsub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) @@ -162,8 +162,8 @@ int8x16_t test_vqsubq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred // CHECK-LABEL: @test_vqsubq_m_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qsub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) @@ -180,8 +180,8 @@ uint16x8_t test_vqsubq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mv // CHECK-LABEL: @test_vqsubq_m_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qsub.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c b/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c index b73ae24ad9f6..bc904809a1e9 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c @@ -98,8 +98,8 @@ float16x8_t test_vsubq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vsubq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = sub <4 x i32> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: ret <4 x i32> [[TMP0]] // @@ -114,8 +114,8 @@ uint32x4_t test_vsubq_n_u32(uint32x4_t a, uint32_t b) // CHECK-LABEL: @test_vsubq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[DOTSPLAT]] // CHECK-NEXT: ret <8 x half> [[TMP0]] // @@ -130,8 +130,8 @@ float16x8_t test_vsubq_n_f16(float16x8_t a, float16_t b) // CHECK-LABEL: @test_vsubq_m_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) @@ -148,8 +148,8 @@ int8x16_t test_vsubq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1 // CHECK-LABEL: @test_vsubq_m_n_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) @@ -166,8 +166,8 @@ float32x4_t test_vsubq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b, // CHECK-LABEL: @test_vsubq_x_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) @@ -184,8 +184,8 @@ uint16x8_t test_vsubq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p) // CHECK-LABEL: @test_vsubq_x_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef) diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c index 427ac47d17e4..b0dda0bc29e9 100644 --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -1359,12 +1359,12 @@ vector float test_vec_vec_splati_f(void) { vector double test_vec_vec_splatid(void) { // CHECK-BE: [[T1:%.+]] = fpext float %{{.+}} to double - // CHECK-BE-NEXT: [[T2:%.+]] = insertelement <2 x double> undef, double [[T1:%.+]], i32 0 - // CHECK-BE-NEXT: [[T3:%.+]] = shufflevector <2 x double> [[T2:%.+]], <2 x double> undef, <2 x i32> zeroinitialize + // CHECK-BE-NEXT: [[T2:%.+]] = insertelement <2 x double> poison, double [[T1:%.+]], i32 0 + // CHECK-BE-NEXT: [[T3:%.+]] = shufflevector <2 x double> [[T2:%.+]], <2 x double> poison, <2 x i32> zeroinitialize // CHECK-BE-NEXT: ret <2 x double> [[T3:%.+]] // CHECK-LE: [[T1:%.+]] = fpext float %{{.+}} to double - // CHECK-LE-NEXT: [[T2:%.+]] = insertelement <2 x double> undef, double [[T1:%.+]], i32 0 - // CHECK-LE-NEXT: [[T3:%.+]] = shufflevector <2 x double> [[T2:%.+]], <2 x double> undef, <2 x i32> zeroinitialize + // CHECK-LE-NEXT: [[T2:%.+]] = insertelement <2 x double> poison, double [[T1:%.+]], i32 0 + // CHECK-LE-NEXT: [[T3:%.+]] = shufflevector <2 x double> [[T2:%.+]], <2 x double> poison, <2 x i32> zeroinitialize // CHECK-LE-NEXT: ret <2 x double> [[T3:%.+]] return vec_splatid(1.0); } diff --git a/clang/test/CodeGen/matrix-type-operators.c b/clang/test/CodeGen/matrix-type-operators.c index f44ecd77337c..9a9cb67f6ec5 100644 --- a/clang/test/CodeGen/matrix-type-operators.c +++ b/clang/test/CodeGen/matrix-type-operators.c @@ -32,8 +32,8 @@ void add_matrix_scalar_double_float(dx5x5_t a, float vf) { // CHECK: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8 // CHECK-NEXT: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> undef, double [[SCALAR_EXT]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> undef, <25 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8 @@ -44,8 +44,8 @@ void add_matrix_scalar_double_double(dx5x5_t a, double vd) { // CHECK-LABEL: define void @add_matrix_scalar_double_double(<25 x double> %a, double %vd) // CHECK: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8 // CHECK-NEXT: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8 - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> undef, double [[SCALAR]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> undef, <25 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8 @@ -56,8 +56,8 @@ void add_matrix_scalar_float_float(fx2x3_t b, float vf) { // CHECK-LABEL: define void @add_matrix_scalar_float_float(<6 x float> %b, float %vf) // CHECK: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4 // CHECK-NEXT: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4 - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> undef, float [[SCALAR]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> undef, <6 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4 @@ -69,8 +69,8 @@ void add_matrix_scalar_float_double(fx2x3_t b, double vd) { // CHECK: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4 // CHECK-NEXT: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> undef, float [[SCALAR_TRUNC]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> undef, <6 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4 @@ -103,8 +103,8 @@ void add_matrix_scalar_int_short(ix9x3_t a, short vs) { // CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4 // CHECK-NEXT: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32 - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> undef, i32 [[SCALAR_EXT]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4 @@ -116,8 +116,8 @@ void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) { // CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4 // CHECK-NEXT: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> undef, i32 [[SCALAR_TRUNC]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4 @@ -129,8 +129,8 @@ void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int // CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4 // CHECK-NEXT: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> undef, i32 [[SCALAR_TRUNC]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4 @@ -142,8 +142,8 @@ void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) { // CHECK: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8 - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> undef, i64 [[SCALAR_EXT]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> undef, <8 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]] // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8 @@ -154,8 +154,8 @@ void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) { // CHECK-LABEL: define void @add_matrix_scalar_long_long_int_int(<8 x i64> %b, i64 %vli) // CHECK: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8 - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> undef, i64 [[SCALAR]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> undef, <8 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]] // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8 @@ -166,8 +166,8 @@ void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned lo // CHECK-LABEL: define void @add_matrix_scalar_long_long_int_unsigned_long_long // CHECK: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8 - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> undef, i64 [[SCALAR]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> undef, <8 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]] // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8 b = vulli + b; @@ -208,8 +208,8 @@ void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) { // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8 // CHECK-NEXT: [[S:%.*]] = load float, float* %s.addr, align 4 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double -// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> undef, double [[S_EXT]], i32 0 -// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> undef, <25 x i32> zeroinitializer +// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0 +// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]] // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8 // CHECK-NEXT: ret void @@ -221,8 +221,8 @@ void multiply_double_matrix_scalar_float(dx5x5_t a, float s) { // CHECK-LABEL: @multiply_double_matrix_scalar_double( // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8 // CHECK-NEXT: [[S:%.*]] = load double, double* %s.addr, align 8 -// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> undef, double [[S]], i32 0 -// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> undef, <25 x i32> zeroinitializer +// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0 +// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]] // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8 // CHECK-NEXT: ret void @@ -235,8 +235,8 @@ void multiply_double_matrix_scalar_double(dx5x5_t a, double s) { // CHECK: [[S:%.*]] = load double, double* %s.addr, align 8 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4 -// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> undef, float [[S_TRUNC]], i32 0 -// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> undef, <6 x i32> zeroinitializer +// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0 +// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]] // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void @@ -249,8 +249,8 @@ void multiply_float_matrix_scalar_double(fx2x3_t b, double s) { // CHECK: [[S:%.*]] = load i16, i16* %s.addr, align 2 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4 -// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> undef, i32 [[S_EXT]], i32 0 -// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> undef, <27 x i32> zeroinitializer +// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0 +// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]] // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void @@ -263,8 +263,8 @@ void multiply_int_matrix_scalar_short(ix9x3_t b, short s) { // CHECK: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4 // CHECK-NEXT: [[S:%.*]] = load i64, i64* %s.addr, align 8 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32 -// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> undef, i32 [[S_TRUNC]], i32 0 -// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> undef, <27 x i32> zeroinitializer +// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0 +// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]] // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void diff --git a/clang/test/CodeGen/vecshift.c b/clang/test/CodeGen/vecshift.c index f37c5092875f..9884975c3c53 100644 --- a/clang/test/CodeGen/vecshift.c +++ b/clang/test/CodeGen/vecshift.c @@ -70,36 +70,36 @@ void foo() { vc8 = c << vc8; // CHECK: [[t6:%.+]] = load i8, i8* @c, -// CHECK: [[splat_splatinsert:%.+]] = insertelement <8 x i8> undef, i8 [[t6]], i32 0 -// CHECK: [[splat_splat:%.+]] = shufflevector <8 x i8> [[splat_splatinsert]], <8 x i8> undef, <8 x i32> zeroinitializer +// CHECK: [[splat_splatinsert:%.+]] = insertelement <8 x i8> poison, i8 [[t6]], i32 0 +// CHECK: [[splat_splat:%.+]] = shufflevector <8 x i8> [[splat_splatinsert]], <8 x i8> poison, <8 x i32> zeroinitializer // CHECK: [[t7:%.+]] = load <8 x i8>, <8 x i8>* {{@.+}}, // CHECK: shl <8 x i8> [[splat_splat]], [[t7]] vuc8 = i << vuc8; // CHECK: [[t8:%.+]] = load i32, i32* @i, // CHECK: [[tconv:%.+]] = trunc i32 [[t8]] to i8 -// CHECK: [[splat_splatinsert7:%.+]] = insertelement <8 x i8> undef, i8 [[tconv]], i32 0 -// CHECK: [[splat_splat8:%.+]] = shufflevector <8 x i8> [[splat_splatinsert7]], <8 x i8> undef, <8 x i32> zeroinitializer +// CHECK: [[splat_splatinsert7:%.+]] = insertelement <8 x i8> poison, i8 [[tconv]], i32 0 +// CHECK: [[splat_splat8:%.+]] = shufflevector <8 x i8> [[splat_splatinsert7]], <8 x i8> poison, <8 x i32> zeroinitializer // CHECK: [[t9:%.+]] = load <8 x i8>, <8 x i8>* {{@.+}}, // CHECK: shl <8 x i8> [[splat_splat8]], [[t9]] vi8 = uc << vi8; // CHECK: [[t10:%.+]] = load i8, i8* @uc, // CHECK: [[conv10:%.+]] = zext i8 [[t10]] to i32 -// CHECK: [[splat_splatinsert11:%.+]] = insertelement <8 x i32> undef, i32 [[conv10]], i32 0 -// CHECK: [[splat_splat12:%.+]] = shufflevector <8 x i32> [[splat_splatinsert11]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[splat_splatinsert11:%.+]] = insertelement <8 x i32> poison, i32 [[conv10]], i32 0 +// CHECK: [[splat_splat12:%.+]] = shufflevector <8 x i32> [[splat_splatinsert11]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[t11:%.+]] = load <8 x i32>, <8 x i32>* {{@.+}}, // CHECK: shl <8 x i32> [[splat_splat12]], [[t11]] vui8 = us << vui8; // CHECK: [[t12:%.+]] = load i16, i16* @us, // CHECK: [[conv14:%.+]] = zext i16 [[t12]] to i32 -// CHECK: [[splat_splatinsert15:%.+]] = insertelement <8 x i32> undef, i32 [[conv14]], i32 0 -// CHECK: [[splat_splat16:%.+]] = shufflevector <8 x i32> [[splat_splatinsert15]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[splat_splatinsert15:%.+]] = insertelement <8 x i32> poison, i32 [[conv14]], i32 0 +// CHECK: [[splat_splat16:%.+]] = shufflevector <8 x i32> [[splat_splatinsert15]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[t13:%.+]] = load <8 x i32>, <8 x i32>* {{@.+}}, // CHECK: shl <8 x i32> [[splat_splat16]], [[t13]] vs8 = ui << vs8; // CHECK: [[t14:%.+]] = load i32, i32* @ui, // CHECK: [[conv18:%.+]] = trunc i32 [[t14]] to i16 -// CHECK: [[splat_splatinsert19:%.+]] = insertelement <8 x i16> undef, i16 [[conv18]], i32 0 -// CHECK: [[splat_splat20:%.+]] = shufflevector <8 x i16> [[splat_splatinsert19]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK: [[splat_splatinsert19:%.+]] = insertelement <8 x i16> poison, i16 [[conv18]], i32 0 +// CHECK: [[splat_splat20:%.+]] = shufflevector <8 x i16> [[splat_splatinsert19]], <8 x i16> poison, <8 x i32> zeroinitializer // CHECK: [[t15:%.+]] = load <8 x i16>, <8 x i16>* {{@.+}}, // CHECK: shl <8 x i16> [[splat_splat20]], [[t15]] vus8 = 1 << vus8; @@ -134,28 +134,28 @@ void foo() { vui8 <<= s; // CHECK: [[t27:%.+]] = load i16, i16* @s, // CHECK: [[conv40:%.+]] = sext i16 [[t27]] to i32 -// CHECK: [[splat_splatinsert41:%.+]] = insertelement <8 x i32> undef, i32 [[conv40]], i32 0 -// CHECK: [[splat_splat42:%.+]] = shufflevector <8 x i32> [[splat_splatinsert41]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[splat_splatinsert41:%.+]] = insertelement <8 x i32> poison, i32 [[conv40]], i32 0 +// CHECK: [[splat_splat42:%.+]] = shufflevector <8 x i32> [[splat_splatinsert41]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[t28:%.+]] = load <8 x i32>, <8 x i32>* {{@.+}}, // CHECK: shl <8 x i32> [[t28]], [[splat_splat42]] vi8 <<= us; // CHECK: [[t29:%.+]] = load i16, i16* @us, // CHECK: [[conv44:%.+]] = zext i16 [[t29]] to i32 -// CHECK: [[splat_splatinsert45:%.+]] = insertelement <8 x i32> undef, i32 [[conv44]], i32 0 -// CHECK: [[splat_splat46:%.+]] = shufflevector <8 x i32> [[splat_splatinsert45]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[splat_splatinsert45:%.+]] = insertelement <8 x i32> poison, i32 [[conv44]], i32 0 +// CHECK: [[splat_splat46:%.+]] = shufflevector <8 x i32> [[splat_splatinsert45]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[t30:%.+]] = load <8 x i32>, <8 x i32>* {{@.+}}, // CHECK: shl <8 x i32> [[t30]], [[splat_splat46]] vus8 <<= i; // CHECK: [[t31:%.+]] = load i32, i32* @i, -// CHECK: [[splat_splatinsert48:%.+]] = insertelement <8 x i32> undef, i32 [[t31]], i32 0 -// CHECK: [[splat_splat49:%.+]] = shufflevector <8 x i32> [[splat_splatinsert48]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[splat_splatinsert48:%.+]] = insertelement <8 x i32> poison, i32 [[t31]], i32 0 +// CHECK: [[splat_splat49:%.+]] = shufflevector <8 x i32> [[splat_splatinsert48]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[t32:%.+]] = load <8 x i16>, <8 x i16>* {{@.+}}, // CHECK: [[sh_prom50:%.+]] = trunc <8 x i32> [[splat_splat49]] to <8 x i16> // CHECK: shl <8 x i16> [[t32]], [[sh_prom50]] vs8 <<= ui; // CHECK: [[t33:%.+]] = load i32, i32* @ui, -// CHECK: [[splat_splatinsert52:%.+]] = insertelement <8 x i32> undef, i32 [[t33]], i32 0 -// CHECK: [[splat_splat53:%.+]] = shufflevector <8 x i32> [[splat_splatinsert52]], <8 x i32> undef, <8 x i32> zeroinitializer +// CHECK: [[splat_splatinsert52:%.+]] = insertelement <8 x i32> poison, i32 [[t33]], i32 0 +// CHECK: [[splat_splat53:%.+]] = shufflevector <8 x i32> [[splat_splatinsert52]], <8 x i32> poison, <8 x i32> zeroinitializer // CHECK: [[t34:%.+]] = load <8 x i16>, <8 x i16>* {{@.+}}, // CHECK: [[sh_prom54:%.+]] = trunc <8 x i32> [[splat_splat53]] to <8 x i16> // CHECK: shl <8 x i16> [[t34]], [[sh_prom54]] diff --git a/clang/test/CodeGenCXX/matrix-type-operators.cpp b/clang/test/CodeGenCXX/matrix-type-operators.cpp index 568ea8bbcd35..4e825d0e92e0 100644 --- a/clang/test/CodeGenCXX/matrix-type-operators.cpp +++ b/clang/test/CodeGenCXX/matrix-type-operators.cpp @@ -61,8 +61,8 @@ void test_DoubleWrapper1_Sub1(MyMatrix &m) { // CHECK-LABEL: define void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE( // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8 // CHECK: [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1) - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8 @@ -75,8 +75,8 @@ void test_DoubleWrapper1_Sub2(MyMatrix &m) { // CHECK-LABEL: define void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE( // CHECK: [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1) // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8 - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]] // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8 @@ -96,8 +96,8 @@ void test_DoubleWrapper2_Add1(MyMatrix &m) { // CHECK-LABEL: define void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE( // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* %1, align 8 // CHECK: [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2) - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8 @@ -110,8 +110,8 @@ void test_DoubleWrapper2_Add2(MyMatrix &m) { // CHECK-LABEL: define void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE( // CHECK: [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2) // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* %1, align 8 - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]] // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8 @@ -132,8 +132,8 @@ void test_IntWrapper_Add(MyMatrix &m) { // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8 // CHECK: [[SCALAR:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3) // CHECK: [[SCALAR_FP:%.*]] = sitofp i32 %call to double - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR_FP]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8 @@ -147,8 +147,8 @@ void test_IntWrapper_Sub(MyMatrix &m) { // CHECK: [[SCALAR:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3) // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8 - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR_FP]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]] // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8 @@ -188,8 +188,8 @@ void test_IntWrapper_Multiply(MyMatrix &m, IntWrapper &w3) { // CHECK: [[SCALAR:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{.*}}) // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8 - // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> undef, double [[SCALAR_FP]], i32 0 - // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> undef, <90 x i32> zeroinitializer + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]] // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8 // CHECK: ret void diff --git a/clang/test/CodeGenCXX/vector-conditional.cpp b/clang/test/CodeGenCXX/vector-conditional.cpp index 117f93859a76..1a739bc56ed5 100644 --- a/clang/test/CodeGenCXX/vector-conditional.cpp +++ b/clang/test/CodeGenCXX/vector-conditional.cpp @@ -62,22 +62,22 @@ void TwoScalarOps() { four_shorts ? some_short : some_short; // CHECK: %[[COND:.+]] = load <4 x i16> // CHECK: %[[LHS:.+]] = load i16 - // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> undef, i16 %[[LHS]], i32 0 - // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i16> %[[LHS_SPLAT_INSERT]], <4 x i16> undef, <4 x i32> zeroinitializer + // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[LHS]], i32 0 + // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i16> %[[LHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer // CHECK: %[[RHS:.+]] = load i16 - // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> undef, i16 %[[RHS]], i32 0 - // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i16> %[[RHS_SPLAT_INSERT]], <4 x i16> undef, <4 x i32> zeroinitializer + // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[RHS]], i32 0 + // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i16> %[[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i16> %[[COND]], zeroinitializer // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i16> %[[LHS_SPLAT]], <4 x i16> %[[RHS_SPLAT]] four_shorts ? some_ushort : some_ushort; // CHECK: %[[COND:.+]] = load <4 x i16> // CHECK: %[[LHS:.+]] = load i16 - // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> undef, i16 %[[LHS]], i32 0 - // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i16> %[[LHS_SPLAT_INSERT]], <4 x i16> undef, <4 x i32> zeroinitializer + // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[LHS]], i32 0 + // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i16> %[[LHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer // CHECK: %[[RHS:.+]] = load i16 - // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> undef, i16 %[[RHS]], i32 0 - // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i16> %[[RHS_SPLAT_INSERT]], <4 x i16> undef, <4 x i32> zeroinitializer + // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[RHS]], i32 0 + // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i16> %[[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i16> %[[COND]], zeroinitializer // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i16> %[[LHS_SPLAT]], <4 x i16> %[[RHS_SPLAT]] @@ -85,12 +85,12 @@ void TwoScalarOps() { // CHECK: %[[COND:.+]] = load <4 x i32> // CHECK: %[[LHS:.+]] = load i16 // CHECK: %[[LHS_ZEXT:.+]] = zext i16 %[[LHS]] to i32 - // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[LHS_ZEXT]], i32 0 - // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer + // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[LHS_ZEXT]], i32 0 + // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: %[[RHS:.+]] = load i16 // CHECK: %[[RHS_SEXT:.+]] = sext i16 %[[RHS]] to i32 - // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[RHS_SEXT]], i32 0 - // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer + // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS_SEXT]], i32 0 + // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS_SPLAT]], <4 x i32> %[[RHS_SPLAT]] @@ -98,35 +98,35 @@ void TwoScalarOps() { // CHECK: %[[COND:.+]] = load <4 x i32> // CHECK: %[[LHS:.+]] = load i32 // CHECK: %[[LHS_CONV:.+]] = sitofp i32 %[[LHS]] to float - // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x float> undef, float %[[LHS_CONV]], i32 0 - // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x float> %[[LHS_SPLAT_INSERT]], <4 x float> undef, <4 x i32> zeroinitializer + // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x float> poison, float %[[LHS_CONV]], i32 0 + // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x float> %[[LHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK: %[[RHS:.+]] = load float - // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x float> undef, float %[[RHS]], i32 0 - // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x float> %[[RHS_SPLAT_INSERT]], <4 x float> undef, <4 x i32> zeroinitializer + // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x float> poison, float %[[RHS]], i32 0 + // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x float> %[[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x float> %[[LHS_SPLAT]], <4 x float> %[[RHS_SPLAT]] four_ll ? some_double : some_ll; // CHECK: %[[COND:.+]] = load <4 x i64> // CHECK: %[[LHS:.+]] = load double - // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x double> undef, double %[[LHS]], i32 0 - // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x double> %[[LHS_SPLAT_INSERT]], <4 x double> undef, <4 x i32> zeroinitializer + // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x double> poison, double %[[LHS]], i32 0 + // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x double> %[[LHS_SPLAT_INSERT]], <4 x double> poison, <4 x i32> zeroinitializer // CHECK: %[[RHS:.+]] = load i64 // CHECK: %[[RHS_CONV:.+]] = sitofp i64 %[[RHS]] to double - // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x double> undef, double %[[RHS_CONV]], i32 0 - // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x double> %[[RHS_SPLAT_INSERT]], <4 x double> undef, <4 x i32> zeroinitializer + // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x double> poison, double %[[RHS_CONV]], i32 0 + // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x double> %[[RHS_SPLAT_INSERT]], <4 x double> poison, <4 x i32> zeroinitializer // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x double> %[[LHS_SPLAT]], <4 x double> %[[RHS_SPLAT]] four_ints ? some_int : some_short; // CHECK: %[[COND:.+]] = load <4 x i32> // CHECK: %[[LHS:.+]] = load i32 - // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[LHS]], i32 0 - // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer + // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[LHS]], i32 0 + // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: %[[RHS:.+]] = load i16 // CHECK: %[[RHS_SEXT:.+]] = sext i16 %[[RHS]] to i32 - // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[RHS_SEXT]], i32 0 - // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer + // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS_SEXT]], i32 0 + // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS_SPLAT]], <4 x i32> %[[RHS_SPLAT]] } @@ -137,8 +137,8 @@ void OneScalarOp() { // CHECK: %[[COND:.+]] = load <4 x i32> // CHECK: %[[LHS:.+]] = load <4 x i32> // CHECK: %[[RHS:.+]] = load i32 - // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[RHS]], i32 0 - // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer + // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS]], i32 0 + // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> %[[RHS_SPLAT]] @@ -152,8 +152,8 @@ void OneScalarOp() { // CHECK: %[[COND:.+]] = load <4 x i32> // CHECK: %[[RHS:.+]] = load float // CHECK: %[[RHS_CONV:.+]] = fptosi float %[[RHS]] to i32 - // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[RHS_CONV]], i32 0 - // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer + // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS_CONV]], i32 0 + // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[COND]], <4 x i32> %[[RHS_SPLAT]] @@ -167,8 +167,8 @@ void OneScalarOp() { // CHECK: %[[COND:.+]] = load <4 x i32> // CHECK: %[[LHS:.+]] = load float // CHECK: %[[LHS_CONV:.+]] = fptosi float %[[LHS]] to i32 - // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> undef, i32 %[[LHS_CONV]], i32 0 - // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> undef, <4 x i32> zeroinitializer + // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[LHS_CONV]], i32 0 + // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: %[[RHS:.+]] = load <4 x i32> // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS_SPLAT]], <4 x i32> %[[RHS]] @@ -177,8 +177,8 @@ void OneScalarOp() { // CHECK: %[[COND:.+]] = load <4 x i32> // CHECK: %[[LHS:.+]] = load <4 x float> // CHECK: %[[RHS:.+]] = load float - // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x float> undef, float %[[RHS]], i32 0 - // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x float> %[[RHS_SPLAT_INSERT]], <4 x float> undef, <4 x i32> zeroinitializer + // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x float> poison, float %[[RHS]], i32 0 + // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x float> %[[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x float> %[[LHS]], <4 x float> %[[RHS_SPLAT]] @@ -205,8 +205,8 @@ void OneScalarOp() { // CHECK: %[[LHS:.+]] = load <4 x i64> // CHECK: %[[RHS:.+]] = load i32 // CHECK: %[[RHS_CONV:.+]] = sext i32 %[[RHS]] to i64 - // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> undef, i64 %[[RHS_CONV]], i32 0 - // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> undef, <4 x i32> zeroinitializer + // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> poison, i64 %[[RHS_CONV]], i32 0 + // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> %[[RHS_SPLAT]] @@ -214,8 +214,8 @@ void OneScalarOp() { // CHECK: %[[COND:.+]] = load <4 x i64> // CHECK: %[[LHS:.+]] = load <4 x i64> // CHECK: %[[RHS:.+]] = load i64 - // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> undef, i64 %[[RHS]], i32 0 - // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> undef, <4 x i32> zeroinitializer + // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> poison, i64 %[[RHS]], i32 0 + // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> %[[RHS_SPLAT]] @@ -224,8 +224,8 @@ void OneScalarOp() { // CHECK: %[[LHS:.+]] = load <4 x i64> // CHECK: %[[RHS:.+]] = load double // CHECK: %[[RHS_CONV:.+]] = fptosi double %[[RHS]] to i64 - // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> undef, i64 %[[RHS_CONV]], i32 0 - // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> undef, <4 x i32> zeroinitializer + // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> poison, i64 %[[RHS_CONV]], i32 0 + // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> %[[RHS_SPLAT]] } diff --git a/clang/test/CodeGenCXX/vector-splat-conversion.cpp b/clang/test/CodeGenCXX/vector-splat-conversion.cpp index 618ad76ce691..82cec3d9d54a 100644 --- a/clang/test/CodeGenCXX/vector-splat-conversion.cpp +++ b/clang/test/CodeGenCXX/vector-splat-conversion.cpp @@ -56,7 +56,7 @@ gcc_int_2 FloatToIntConversion(gcc_int_2 Int2, float f) { // CHECK: %[[LOAD_INT:.+]] = load <2 x i32> // CHECK: %[[LOAD:.+]] = load float, float* // CHECK: %[[CONV:.+]] = fptosi float %[[LOAD]] to i32 - // CHECK: %[[INSERT:.+]] = insertelement <2 x i32> undef, i32 %[[CONV]], i32 0 - // CHECK: %[[SPLAT:.+]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer + // CHECK: %[[INSERT:.+]] = insertelement <2 x i32> poison, i32 %[[CONV]], i32 0 + // CHECK: %[[SPLAT:.+]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> poison, <2 x i32> zeroinitializer // CHECK: add <2 x i32> %[[LOAD_INT]], %[[SPLAT]] } diff --git a/clang/test/CodeGenOpenCL/bool_cast.cl b/clang/test/CodeGenOpenCL/bool_cast.cl index 72926eb141ab..395991cb8f13 100644 --- a/clang/test/CodeGenOpenCL/bool_cast.cl +++ b/clang/test/CodeGenOpenCL/bool_cast.cl @@ -11,8 +11,8 @@ void kernel ker() { // CHECK: {{%.*}} = load i8, i8* %t, align 1 // CHECK: {{%.*}} = trunc i8 {{%.*}} to i1 // CHECK: {{%.*}} = sext i1 {{%.*}} to i32 -// CHECK: {{%.*}} = insertelement <4 x i32> undef, i32 {{%.*}}, i32 0 -// CHECK: {{%.*}} = shufflevector <4 x i32> {{%.*}}, <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK: {{%.*}} = insertelement <4 x i32> poison, i32 {{%.*}}, i32 0 +// CHECK: {{%.*}} = shufflevector <4 x i32> {{%.*}}, <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: store <4 x i32> {{%.*}}, <4 x i32>* %vec4, align 16 int i = (int)t; // CHECK: {{%.*}} = load i8, i8* %t, align 1 diff --git a/clang/test/CodeGenOpenCL/shifts.cl b/clang/test/CodeGenOpenCL/shifts.cl index 7011a4234bb5..020efd777cfd 100644 --- a/clang/test/CodeGenOpenCL/shifts.cl +++ b/clang/test/CodeGenOpenCL/shifts.cl @@ -60,8 +60,8 @@ int4 vectorVectorTest(int4 a,int4 b) { //NOOPT-LABEL: @vectorScalarTest int4 vectorScalarTest(int4 a,int b) { - //NOOPT: [[SP0:%.+]] = insertelement <4 x i32> undef - //NOOPT: [[SP1:%.+]] = shufflevector <4 x i32> [[SP0]], <4 x i32> undef, <4 x i32> zeroinitializer + //NOOPT: [[SP0:%.+]] = insertelement <4 x i32> poison + //NOOPT: [[SP1:%.+]] = shufflevector <4 x i32> [[SP0]], <4 x i32> poison, <4 x i32> zeroinitializer //NOOPT: [[VSM:%.+]] = and <4 x i32> [[SP1]], //NOOPT: [[VSC:%.+]] = shl <4 x i32> [[VSS:%.+]], [[VSM]] int4 c = a << b; diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 6bc5e89453ad..4b26299d046c 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -2524,10 +2524,10 @@ public: } /// Create a unary shuffle. The second vector operand of the IR instruction - /// is undefined. + /// is poison. Value *CreateShuffleVector(Value *V, ArrayRef Mask, const Twine &Name = "") { - return CreateShuffleVector(V, UndefValue::get(V->getType()), Mask, Name); + return CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask, Name); } Value *CreateExtractValue(Value *Agg, diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index f936f5756b6f..e8fa35314a94 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -1009,15 +1009,16 @@ Value *IRBuilderBase::CreateVectorSplat(ElementCount EC, Value *V, const Twine &Name) { assert(EC.isNonZero() && "Cannot splat to an empty vector!"); - // First insert it into an undef vector so we can shuffle it. + // First insert it into a poison vector so we can shuffle it. Type *I32Ty = getInt32Ty(); - Value *Undef = UndefValue::get(VectorType::get(V->getType(), EC)); - V = CreateInsertElement(Undef, V, ConstantInt::get(I32Ty, 0), + Value *Poison = PoisonValue::get(VectorType::get(V->getType(), EC)); + V = CreateInsertElement(Poison, V, ConstantInt::get(I32Ty, 0), Name + ".splatinsert"); // Shuffle the value across the desired number of elements. - Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32Ty, EC)); - return CreateShuffleVector(V, Undef, Zeros, Name + ".splat"); + SmallVector Zeros; + Zeros.resize(EC.getKnownMinValue()); + return CreateShuffleVector(V, Zeros, Name + ".splat"); } Value *IRBuilderBase::CreateExtractInteger( diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll index 38a3a6f5c70b..fdfe6acbdb49 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll @@ -2790,8 +2790,8 @@ define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) { ; CHECK-LABEL: @sse2_psrai_w_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -2803,8 +2803,8 @@ define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) { define <8 x i32> @avx2_psrai_d_256_masked(<8 x i32> %v, i32 %a) { ; CHECK-LABEL: @avx2_psrai_d_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; @@ -2817,8 +2817,8 @@ define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) { ; CHECK-LABEL: @avx512_psrai_q_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <8 x i64> [[TMP3]] ; @@ -2830,8 +2830,8 @@ define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) { define <4 x i32> @sse2_psrli_d_128_masked(<4 x i32> %v, i32 %a) { ; CHECK-LABEL: @sse2_psrli_d_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -2844,8 +2844,8 @@ define <4 x i64> @avx2_psrli_q_256_masked(<4 x i64> %v, i32 %a) { ; CHECK-LABEL: @avx2_psrli_q_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TMP2]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <4 x i64> [[TMP3]] ; @@ -2858,8 +2858,8 @@ define <32 x i16> @avx512_psrli_w_512_masked(<32 x i16> %v, i32 %a) { ; CHECK-LABEL: @avx512_psrli_w_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> undef, i16 [[TMP2]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; @@ -2872,8 +2872,8 @@ define <2 x i64> @sse2_pslli_q_128_masked(<2 x i64> %v, i32 %a) { ; CHECK-LABEL: @sse2_pslli_q_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -2886,8 +2886,8 @@ define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) { ; CHECK-LABEL: @avx2_pslli_w_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> undef, i16 [[TMP2]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; @@ -2899,8 +2899,8 @@ define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) { define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) { ; CHECK-LABEL: @avx512_pslli_d_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i32> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <16 x i32> [[TMP2]] ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll index dc9844fc0162..e9b7a9a17b20 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll @@ -2790,8 +2790,8 @@ define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) { ; CHECK-LABEL: @sse2_psrai_w_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -2803,8 +2803,8 @@ define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) { define <8 x i32> @avx2_psrai_d_256_masked(<8 x i32> %v, i32 %a) { ; CHECK-LABEL: @avx2_psrai_d_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; @@ -2817,8 +2817,8 @@ define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) { ; CHECK-LABEL: @avx512_psrai_q_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <8 x i64> [[TMP3]] ; @@ -2830,8 +2830,8 @@ define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) { define <4 x i32> @sse2_psrli_d_128_masked(<4 x i32> %v, i32 %a) { ; CHECK-LABEL: @sse2_psrli_d_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -2844,8 +2844,8 @@ define <4 x i64> @avx2_psrli_q_256_masked(<4 x i64> %v, i32 %a) { ; CHECK-LABEL: @avx2_psrli_q_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TMP2]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <4 x i64> [[TMP3]] ; @@ -2858,8 +2858,8 @@ define <32 x i16> @avx512_psrli_w_512_masked(<32 x i16> %v, i32 %a) { ; CHECK-LABEL: @avx512_psrli_w_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> undef, i16 [[TMP2]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; @@ -2872,8 +2872,8 @@ define <2 x i64> @sse2_pslli_q_128_masked(<2 x i64> %v, i32 %a) { ; CHECK-LABEL: @sse2_pslli_q_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -2886,8 +2886,8 @@ define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) { ; CHECK-LABEL: @avx2_pslli_w_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> undef, i16 [[TMP2]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; @@ -2899,8 +2899,8 @@ define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) { define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) { ; CHECK-LABEL: @avx512_pslli_d_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i32> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <16 x i32> [[TMP2]] ; diff --git a/llvm/test/Transforms/InstCombine/gep-inbounds-null.ll b/llvm/test/Transforms/InstCombine/gep-inbounds-null.ll index 08a65d0ca35a..788ff5dc6ec8 100644 --- a/llvm/test/Transforms/InstCombine/gep-inbounds-null.ll +++ b/llvm/test/Transforms/InstCombine/gep-inbounds-null.ll @@ -90,7 +90,7 @@ entry: define <2 x i1> @test_vector_index(i8* %base, <2 x i64> %idx) { ; CHECK-LABEL: @test_vector_index( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8*> undef, i8* [[BASE:%.*]], i32 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8*> poison, i8* [[BASE:%.*]], i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <2 x i8*> [[DOTSPLATINSERT]], zeroinitializer ; CHECK-NEXT: [[CND:%.*]] = shufflevector <2 x i1> [[TMP0]], <2 x i1> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CND]] diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index 048dd9cceedf..1e88fc1158d9 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -215,7 +215,7 @@ define <2 x i1> @test13_vector(<2 x i64> %X, <2 x %S*> %P) nounwind { define <2 x i1> @test13_vector2(i64 %X, <2 x %S*> %P) nounwind { ; CHECK-LABEL: @test13_vector2( -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], ; CHECK-NEXT: [[C:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> undef, <2 x i32> zeroinitializer @@ -230,7 +230,7 @@ define <2 x i1> @test13_vector2(i64 %X, <2 x %S*> %P) nounwind { ; This is a test of icmp + shl nuw in disguise - 4611... is 0x3fff... define <2 x i1> @test13_vector3(i64 %X, <2 x %S*> %P) nounwind { ; CHECK-LABEL: @test13_vector3( -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], ; CHECK-NEXT: [[C:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> undef, <2 x i32> zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/shuffle-select-narrow-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shuffle-select-narrow-inseltpoison.ll index f986d10e11a0..2a2c0efe036d 100644 --- a/llvm/test/Transforms/InstCombine/shuffle-select-narrow-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/shuffle-select-narrow-inseltpoison.ll @@ -5,8 +5,8 @@ define <2 x i8> @narrow_shuffle_of_select(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -20,8 +20,8 @@ define <2 x i8> @narrow_shuffle_of_select(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> % define <2 x i8> @narrow_shuffle_of_select_overspecified_extend(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_overspecified_extend( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -35,8 +35,8 @@ define <2 x i8> @narrow_shuffle_of_select_overspecified_extend(<2 x i1> %cmp, <4 define <3 x float> @narrow_shuffle_of_select_undefs(<3 x i1> %cmp, <4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_undefs( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <3 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <3 x i32> ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[CMP:%.*]], <3 x float> [[TMP1]], <3 x float> [[TMP2]] ; CHECK-NEXT: ret <3 x float> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/shuffle-select-narrow.ll b/llvm/test/Transforms/InstCombine/shuffle-select-narrow.ll index bf9609623d89..d1530fb682d0 100644 --- a/llvm/test/Transforms/InstCombine/shuffle-select-narrow.ll +++ b/llvm/test/Transforms/InstCombine/shuffle-select-narrow.ll @@ -5,8 +5,8 @@ define <2 x i8> @narrow_shuffle_of_select(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -20,8 +20,8 @@ define <2 x i8> @narrow_shuffle_of_select(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> % define <2 x i8> @narrow_shuffle_of_select_overspecified_extend(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_overspecified_extend( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -35,8 +35,8 @@ define <2 x i8> @narrow_shuffle_of_select_overspecified_extend(<2 x i1> %cmp, <4 define <3 x float> @narrow_shuffle_of_select_undefs(<3 x i1> %cmp, <4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_undefs( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <3 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <3 x i32> ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[CMP:%.*]], <3 x float> [[TMP1]], <3 x float> [[TMP2]] ; CHECK-NEXT: ret <3 x float> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/type_pun-inseltpoison.ll b/llvm/test/Transforms/InstCombine/type_pun-inseltpoison.ll index a0604c9c836b..58997a25de32 100644 --- a/llvm/test/Transforms/InstCombine/type_pun-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/type_pun-inseltpoison.ll @@ -42,7 +42,7 @@ define i32 @type_pun_first(<16 x i8> %in) { ; Extracting an i32 that isn't aligned to any natural boundary. define i32 @type_pun_misaligned(<16 x i8> %in) { ; CHECK-LABEL: @type_pun_misaligned( -; CHECK-NEXT: [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[SROA_BC:%.*]] = bitcast <16 x i8> [[SROA_EXTRACT]] to <4 x i32> ; CHECK-NEXT: [[SROA_EXTRACT1:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0 ; CHECK-NEXT: ret i32 [[SROA_EXTRACT1]] diff --git a/llvm/test/Transforms/InstCombine/type_pun.ll b/llvm/test/Transforms/InstCombine/type_pun.ll index 56d1ffcb5d31..99bfcf9010db 100644 --- a/llvm/test/Transforms/InstCombine/type_pun.ll +++ b/llvm/test/Transforms/InstCombine/type_pun.ll @@ -42,7 +42,7 @@ define i32 @type_pun_first(<16 x i8> %in) { ; Extracting an i32 that isn't aligned to any natural boundary. define i32 @type_pun_misaligned(<16 x i8> %in) { ; CHECK-LABEL: @type_pun_misaligned( -; CHECK-NEXT: [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[SROA_BC:%.*]] = bitcast <16 x i8> [[SROA_EXTRACT]] to <4 x i32> ; CHECK-NEXT: [[SROA_EXTRACT1:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0 ; CHECK-NEXT: ret i32 [[SROA_EXTRACT1]] diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll index 3a2509722114..2369c580d416 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll @@ -1473,7 +1473,7 @@ define <4 x float> @insert_subvector_crash_invalid_mask_elt(<2 x float> %x, <4 x define <4 x i32> @splat_assoc_add(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add( ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -1486,7 +1486,7 @@ define <4 x i32> @splat_assoc_add(<4 x i32> %x, <4 x i32> %y) { define @vsplat_assoc_add( %x, %y) { ; CHECK-LABEL: @vsplat_assoc_add( ; CHECK-NEXT: [[TMP1:%.*]] = add [[X:%.*]], shufflevector ( insertelement ( undef, i32 317426, i32 0), undef, zeroinitializer) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], undef, zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer ; CHECK-NEXT: [[R:%.*]] = add [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret [[R]] ; @@ -1502,7 +1502,7 @@ define @vsplat_assoc_add( %x, @splat_assoc_add_undef_mask_elts(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_undef_mask_elts( ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -1517,7 +1517,7 @@ define <4 x i32> @splat_assoc_add_undef_mask_elts(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index( ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -1597,7 +1597,7 @@ define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_e define <2 x float> @splat_assoc_fmul(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @splat_assoc_fmul( ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: [[R:%.*]] = fmul reassoc nsz <2 x float> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -1612,7 +1612,7 @@ define <2 x float> @splat_assoc_fmul(<2 x float> %x, <2 x float> %y) { define <3 x i8> @splat_assoc_mul(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { ; CHECK-LABEL: @splat_assoc_mul( ; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <3 x i8> [[R]] ; @@ -1626,7 +1626,7 @@ define <3 x i8> @splat_assoc_mul(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { define <3 x i8> @splat_assoc_mul_undef_elt1(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { ; CHECK-LABEL: @splat_assoc_mul_undef_elt1( ; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <3 x i8> [[R]] ; @@ -1655,7 +1655,7 @@ define <3 x i8> @splat_assoc_mul_undef_elt2(<3 x i8> %x, <3 x i8> %y, <3 x i8> % define <3 x i8> @splat_assoc_mul_undef_elt_at_splat_index1(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { ; CHECK-LABEL: @splat_assoc_mul_undef_elt_at_splat_index1( ; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <3 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index 4443c3628b08..9cee1f3ad73e 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -1472,7 +1472,7 @@ define <4 x float> @insert_subvector_crash_invalid_mask_elt(<2 x float> %x, <4 x define <4 x i32> @splat_assoc_add(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add( ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -1485,7 +1485,7 @@ define <4 x i32> @splat_assoc_add(<4 x i32> %x, <4 x i32> %y) { define @vsplat_assoc_add( %x, %y) { ; CHECK-LABEL: @vsplat_assoc_add( ; CHECK-NEXT: [[TMP1:%.*]] = add [[X:%.*]], shufflevector ( insertelement ( undef, i32 317426, i32 0), undef, zeroinitializer) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], undef, zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer ; CHECK-NEXT: [[R:%.*]] = add [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret [[R]] ; @@ -1501,7 +1501,7 @@ define @vsplat_assoc_add( %x, @splat_assoc_add_undef_mask_elts(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_undef_mask_elts( ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -1516,7 +1516,7 @@ define <4 x i32> @splat_assoc_add_undef_mask_elts(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index( ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -1596,7 +1596,7 @@ define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_e define <2 x float> @splat_assoc_fmul(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @splat_assoc_fmul( ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: [[R:%.*]] = fmul reassoc nsz <2 x float> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -1611,7 +1611,7 @@ define <2 x float> @splat_assoc_fmul(<2 x float> %x, <2 x float> %y) { define <3 x i8> @splat_assoc_mul(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { ; CHECK-LABEL: @splat_assoc_mul( ; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <3 x i8> [[R]] ; @@ -1625,7 +1625,7 @@ define <3 x i8> @splat_assoc_mul(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { define <3 x i8> @splat_assoc_mul_undef_elt1(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { ; CHECK-LABEL: @splat_assoc_mul_undef_elt1( ; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <3 x i8> [[R]] ; @@ -1654,7 +1654,7 @@ define <3 x i8> @splat_assoc_mul_undef_elt2(<3 x i8> %x, <3 x i8> %y, <3 x i8> % define <3 x i8> @splat_assoc_mul_undef_elt_at_splat_index1(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { ; CHECK-LABEL: @splat_assoc_mul_undef_elt_at_splat_index1( ; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <3 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/vscale_cmp.ll b/llvm/test/Transforms/InstCombine/vscale_cmp.ll index e7b8a2e3e3f2..ebc4eccceded 100644 --- a/llvm/test/Transforms/InstCombine/vscale_cmp.ll +++ b/llvm/test/Transforms/InstCombine/vscale_cmp.ll @@ -12,10 +12,10 @@ define @sge( %x) { define @gep_scalevector1(i32* %X) nounwind { ; CHECK-LABEL: @gep_scalevector1( -; CHECK-NEXT: [[S:%.*]] = insertelement undef, i32* [[X:%.*]], i32 0 -; CHECK-NEXT: [[C:%.*]] = icmp eq [[S]], zeroinitializer -; CHECK-NEXT: [[C1:%.*]] = shufflevector [[C]], undef, zeroinitializer -; CHECK-NEXT: ret [[C1]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32* [[X:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq [[DOTSPLATINSERT]], zeroinitializer +; CHECK-NEXT: [[C:%.*]] = shufflevector [[TMP1]], undef, zeroinitializer +; CHECK-NEXT: ret [[C]] ; %A = getelementptr inbounds i32, i32* %X, zeroinitializer %C = icmp eq %A, zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll b/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll index 8aa196b4365e..3952594cb46f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll @@ -104,10 +104,10 @@ for.end: ; preds = %for.body ; CHECK-LABEL: @ptr_ind_plus2( ; CHECK: %[[V0:.*]] = load <8 x i32> ; CHECK: %[[V1:.*]] = load <8 x i32> -; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> undef, <4 x i32> -; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> undef, <4 x i32> -; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> undef, <4 x i32> -; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> undef, <4 x i32> +; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> poison, <4 x i32> +; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> poison, <4 x i32> +; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> poison, <4 x i32> +; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> poison, <4 x i32> ; CHECK: mul nsw <4 x i32> ; CHECK: mul nsw <4 x i32> ; CHECK: add <4 x i32> @@ -117,8 +117,8 @@ for.end: ; preds = %for.body ; FORCE-VEC-LABEL: @ptr_ind_plus2( ; FORCE-VEC: %[[V:.*]] = load <4 x i32> -; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> undef, <2 x i32> -; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> undef, <2 x i32> +; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> poison, <2 x i32> +; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> poison, <2 x i32> ; FORCE-VEC: mul nsw <2 x i32> ; FORCE-VEC: add <2 x i32> ; FORCE-VEC: %index.next = add i64 %index, 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll index aa8478b0b6aa..7cc41d0c6b67 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll @@ -18,8 +18,8 @@ ; CHECK-LABEL: @foo_i32( ; CHECK-LABEL: vector.ph: -; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> undef, i32 %n, i32 0 -; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i32 0 +; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-LABEL: vector.body: ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] @@ -85,8 +85,8 @@ for.end10: ; preds = %for.inc8 ; CHECK-LABEL: @foo_i64( ; CHECK-LABEL: vector.ph: -; CHECK: %[[SplatVal:.*]] = insertelement <2 x i64> undef, i64 %n, i32 0 -; CHECK: %[[Splat:.*]] = shufflevector <2 x i64> %[[SplatVal]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK: %[[SplatVal:.*]] = insertelement <2 x i64> poison, i64 %n, i32 0 +; CHECK: %[[Splat:.*]] = shufflevector <2 x i64> %[[SplatVal]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-LABEL: vector.body: ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index bf50e2d2e298..fb71bb426f62 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -12,8 +12,8 @@ define void @test_stride1_4i32(i32* readonly %data, i32* noalias nocapture %dst, ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) @@ -101,7 +101,7 @@ define void @test_stride-1_4i32(i32* readonly %data, i32* noalias nocapture %dst ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 -3 ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 -; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> , [[REVERSE]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP8]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 0 @@ -170,7 +170,7 @@ define void @test_stride2_4i32(i32* readonly %data, i32* noalias nocapture %dst, ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP7]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> , [[STRIDED_VEC]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP2]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0 @@ -369,8 +369,8 @@ define void @test_stride_loopinvar_4i32(i32* readonly %data, i32* noalias nocapt ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP1]], i32 [[N]]) @@ -554,13 +554,13 @@ define void @test_stride_noninvar3_4i32(i32* readonly %data, i32* noalias nocapt ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[N_VEC]], [[X:%.*]] ; CHECK-NEXT: [[IND_END:%.*]] = add i32 3, [[TMP0]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[X]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> , [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[X]], 4 -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll index 1e3d64437cb7..b7ffdc30a5d9 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll @@ -19,8 +19,8 @@ define i32 @mla_i32(i8* noalias nocapture readonly %A, i8* noalias nocapture rea ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) @@ -107,8 +107,8 @@ define i32 @mla_i8(i8* noalias nocapture readonly %A, i8* noalias nocapture read ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[TMP0]], i32 [[N]]) @@ -195,8 +195,8 @@ define i32 @add_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) @@ -266,8 +266,8 @@ define i32 @mul_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) @@ -337,8 +337,8 @@ define i32 @and_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) @@ -408,8 +408,8 @@ define i32 @or_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) @@ -479,8 +479,8 @@ define i32 @xor_i32(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) @@ -550,8 +550,8 @@ define float @fadd_f32(float* nocapture readonly %x, i32 %n) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) @@ -621,8 +621,8 @@ define float @fmul_f32(float* nocapture readonly %x, i32 %n) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll index 6c6cfa9fadc3..730e26af0bb2 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll @@ -7,8 +7,8 @@ target triple = "thumbv8.1m.main-none-none-eabi" define hidden void @pointer_phi_v4i32_add1(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %s, i32%y) { ; CHECK-LABEL: @pointer_phi_v4i32_add1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -48,8 +48,8 @@ define hidden void @pointer_phi_v4i32_add2(i32* noalias nocapture readonly %A, i ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 1992 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 996 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -58,7 +58,7 @@ define hidden void @pointer_phi_v4i32_add2(i32* noalias nocapture readonly %A, i ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <8 x i32>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[NEXT_GEP4]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 @@ -103,8 +103,8 @@ define hidden void @pointer_phi_v4i32_add3(i32* noalias nocapture readonly %A, i ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 2988 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 996 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -156,8 +156,8 @@ define hidden void @pointer_phi_v8i16_add1(i16* noalias nocapture readonly %A, i ; CHECK-LABEL: @pointer_phi_v8i16_add1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[TMP0]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -199,8 +199,8 @@ define hidden void @pointer_phi_v8i16_add2(i16* noalias nocapture readonly %A, i ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1984 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 992 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[TMP0]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -209,7 +209,7 @@ define hidden void @pointer_phi_v8i16_add2(i16* noalias nocapture readonly %A, i ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[NEXT_GEP]] to <16 x i16>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i16>, <16 x i16>* [[TMP2]], align 2 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> undef, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>* ; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 @@ -295,8 +295,8 @@ define hidden void @pointer_phi_v16i8_add1(i8* noalias nocapture readonly %A, i8 ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[A:%.*]], i32 992 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, i8* [[B:%.*]], i32 992 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -351,8 +351,8 @@ define hidden void @pointer_phi_v16i8_add2(i8* noalias nocapture readonly %A, i8 ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[A:%.*]], i32 1984 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, i8* [[B:%.*]], i32 992 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -361,7 +361,7 @@ define hidden void @pointer_phi_v16i8_add2(i8* noalias nocapture readonly %A, i8 ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[NEXT_GEP]] to <32 x i8>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, <32 x i8>* [[TMP2]], align 1 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> undef, <16 x i32> +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i8> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[NEXT_GEP4]] to <16 x i8>* ; CHECK-NEXT: store <16 x i8> [[TMP3]], <16 x i8>* [[TMP4]], align 1 @@ -446,8 +446,8 @@ end: define hidden void @pointer_phi_v4f32_add1(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) { ; CHECK-LABEL: @pointer_phi_v4f32_add1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -487,8 +487,8 @@ define hidden void @pointer_phi_v4f32_add2(float* noalias nocapture readonly %A, ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr float, float* [[A:%.*]], i32 1992 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr float, float* [[B:%.*]], i32 996 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -497,7 +497,7 @@ define hidden void @pointer_phi_v4f32_add2(float* noalias nocapture readonly %A, ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr float, float* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[NEXT_GEP]] to <8 x float>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, <8 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> undef, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[NEXT_GEP4]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 @@ -542,8 +542,8 @@ define hidden void @pointer_phi_v4f32_add3(float* noalias nocapture readonly %A, ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr float, float* [[A:%.*]], i32 2988 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr float, float* [[B:%.*]], i32 996 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi float* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -594,8 +594,8 @@ end: define hidden void @pointer_phi_v4half_add1(half* noalias nocapture readonly %A, half* noalias nocapture %B, half %y) { ; CHECK-LABEL: @pointer_phi_v4half_add1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[Y:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -635,8 +635,8 @@ define hidden void @pointer_phi_v4half_add2(half* noalias nocapture readonly %A, ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr half, half* [[A:%.*]], i32 1984 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr half, half* [[B:%.*]], i32 992 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[Y:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -645,7 +645,7 @@ define hidden void @pointer_phi_v4half_add2(half* noalias nocapture readonly %A, ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[NEXT_GEP]] to <16 x half>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x half>, <16 x half>* [[TMP1]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x half> [[WIDE_VEC]], <16 x half> undef, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x half> [[WIDE_VEC]], <16 x half> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <8 x half> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast half* [[NEXT_GEP4]] to <8 x half>* ; CHECK-NEXT: store <8 x half> [[TMP2]], <8 x half>* [[TMP3]], align 4 @@ -730,10 +730,10 @@ define hidden void @pointer_phi_v4i32_uf2(i32* noalias nocapture readonly %A, i3 ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 59952 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 9992 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[Y]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -798,14 +798,14 @@ define hidden void @pointer_phi_v4i32_uf4(i32* noalias nocapture readonly %A, i3 ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 59904 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 9984 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i32> undef, i32 [[Y]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT10]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i32> undef, i32 [[Y]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT12]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i32> undef, i32 [[Y]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT14]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT10]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT12]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT14]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index 9228c6f7fdca..b67475f19fd1 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -300,7 +300,7 @@ define void @strides_different_direction(i32* noalias nocapture %A, i32* noalias ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 -3 ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP15]], align 4 -; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[REVERSE]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP7]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index 82f73e2b7c4d..8d856bc610e6 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -13,11 +13,11 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; VF-TWO-CHECK-NEXT: [[BB3:%.*]] = bitcast float* [[BB:%.*]] to i8* ; VF-TWO-CHECK-NEXT: [[CC6:%.*]] = bitcast float* [[CC:%.*]] to i8* ; VF-TWO-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; VF-TWO-CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; VF-TWO-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]] ; VF-TWO-CHECK: iter.check: ; VF-TWO-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2 -; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; VF-TWO-CHECK: vector.memcheck: ; VF-TWO-CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[AA]], i64 [[WIDE_TRIP_COUNT]] ; VF-TWO-CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8* @@ -33,7 +33,7 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; VF-TWO-CHECK-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]] ; VF-TWO-CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]] ; VF-TWO-CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true -; VF-TWO-CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; VF-TWO-CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; VF-TWO-CHECK: vector.main.loop.iter.check: ; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK12:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 48 ; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK12]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -69,40 +69,40 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP11]] ; VF-TWO-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 ; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP25]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP25]], align 4, !alias.scope !0 ; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4 ; VF-TWO-CHECK-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, <4 x float>* [[TMP27]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, <4 x float>* [[TMP27]], align 4, !alias.scope !0 ; VF-TWO-CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 8 ; VF-TWO-CHECK-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP29]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP29]], align 4, !alias.scope !0 ; VF-TWO-CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 12 ; VF-TWO-CHECK-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP31]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP31]], align 4, !alias.scope !0 ; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 16 ; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = bitcast float* [[TMP32]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP33]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP33]], align 4, !alias.scope !0 ; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 20 ; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = bitcast float* [[TMP34]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <4 x float>, <4 x float>* [[TMP35]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <4 x float>, <4 x float>* [[TMP35]], align 4, !alias.scope !0 ; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 24 ; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = bitcast float* [[TMP36]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP37]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP37]], align 4, !alias.scope !0 ; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 28 ; VF-TWO-CHECK-NEXT: [[TMP39:%.*]] = bitcast float* [[TMP38]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <4 x float>, <4 x float>* [[TMP39]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <4 x float>, <4 x float>* [[TMP39]], align 4, !alias.scope !0 ; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 32 ; VF-TWO-CHECK-NEXT: [[TMP41:%.*]] = bitcast float* [[TMP40]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP41]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP41]], align 4, !alias.scope !0 ; VF-TWO-CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 36 ; VF-TWO-CHECK-NEXT: [[TMP43:%.*]] = bitcast float* [[TMP42]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <4 x float>, <4 x float>* [[TMP43]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <4 x float>, <4 x float>* [[TMP43]], align 4, !alias.scope !0 ; VF-TWO-CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 40 ; VF-TWO-CHECK-NEXT: [[TMP45:%.*]] = bitcast float* [[TMP44]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD22:%.*]] = load <4 x float>, <4 x float>* [[TMP45]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD22:%.*]] = load <4 x float>, <4 x float>* [[TMP45]], align 4, !alias.scope !0 ; VF-TWO-CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 44 ; VF-TWO-CHECK-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP46]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, <4 x float>* [[TMP47]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, <4 x float>* [[TMP47]], align 4, !alias.scope !0 ; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP0]] ; VF-TWO-CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP1]] ; VF-TWO-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP2]] @@ -117,40 +117,40 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP11]] ; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 0 ; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP60]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <4 x float>, <4 x float>* [[TMP61]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <4 x float>, <4 x float>* [[TMP61]], align 4, !alias.scope !3 ; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 4 ; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = bitcast float* [[TMP62]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <4 x float>, <4 x float>* [[TMP63]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <4 x float>, <4 x float>* [[TMP63]], align 4, !alias.scope !3 ; VF-TWO-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 8 ; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = bitcast float* [[TMP64]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <4 x float>, <4 x float>* [[TMP65]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <4 x float>, <4 x float>* [[TMP65]], align 4, !alias.scope !3 ; VF-TWO-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 12 ; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = bitcast float* [[TMP66]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD27:%.*]] = load <4 x float>, <4 x float>* [[TMP67]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD27:%.*]] = load <4 x float>, <4 x float>* [[TMP67]], align 4, !alias.scope !3 ; VF-TWO-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 16 ; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = bitcast float* [[TMP68]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD28:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD28:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4, !alias.scope !3 ; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 20 ; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP70]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD29:%.*]] = load <4 x float>, <4 x float>* [[TMP71]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD29:%.*]] = load <4 x float>, <4 x float>* [[TMP71]], align 4, !alias.scope !3 ; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 24 ; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = bitcast float* [[TMP72]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <4 x float>, <4 x float>* [[TMP73]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <4 x float>, <4 x float>* [[TMP73]], align 4, !alias.scope !3 ; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 28 ; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = bitcast float* [[TMP74]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4, !alias.scope !3 ; VF-TWO-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 32 ; VF-TWO-CHECK-NEXT: [[TMP77:%.*]] = bitcast float* [[TMP76]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <4 x float>, <4 x float>* [[TMP77]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <4 x float>, <4 x float>* [[TMP77]], align 4, !alias.scope !3 ; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 36 ; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = bitcast float* [[TMP78]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <4 x float>, <4 x float>* [[TMP79]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <4 x float>, <4 x float>* [[TMP79]], align 4, !alias.scope !3 ; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 40 ; VF-TWO-CHECK-NEXT: [[TMP81:%.*]] = bitcast float* [[TMP80]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4, !alias.scope !3 ; VF-TWO-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 44 ; VF-TWO-CHECK-NEXT: [[TMP83:%.*]] = bitcast float* [[TMP82]] to <4 x float>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <4 x float>, <4 x float>* [[TMP83]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <4 x float>, <4 x float>* [[TMP83]], align 4, !alias.scope !3 ; VF-TWO-CHECK-NEXT: [[TMP84:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD24]] ; VF-TWO-CHECK-NEXT: [[TMP85:%.*]] = fadd fast <4 x float> [[WIDE_LOAD13]], [[WIDE_LOAD25]] ; VF-TWO-CHECK-NEXT: [[TMP86:%.*]] = fadd fast <4 x float> [[WIDE_LOAD14]], [[WIDE_LOAD26]] @@ -177,57 +177,57 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; VF-TWO-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP11]] ; VF-TWO-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 0 ; VF-TWO-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP109]], align 4 +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP109]], align 4, !alias.scope !5, !noalias !7 ; VF-TWO-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 4 ; VF-TWO-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP111]], align 4 +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP111]], align 4, !alias.scope !5, !noalias !7 ; VF-TWO-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 8 ; VF-TWO-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP113]], align 4 +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP113]], align 4, !alias.scope !5, !noalias !7 ; VF-TWO-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 12 ; VF-TWO-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP115]], align 4 +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP115]], align 4, !alias.scope !5, !noalias !7 ; VF-TWO-CHECK-NEXT: [[TMP116:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 16 ; VF-TWO-CHECK-NEXT: [[TMP117:%.*]] = bitcast float* [[TMP116]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP117]], align 4 +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP117]], align 4, !alias.scope !5, !noalias !7 ; VF-TWO-CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 20 ; VF-TWO-CHECK-NEXT: [[TMP119:%.*]] = bitcast float* [[TMP118]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP119]], align 4 +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP119]], align 4, !alias.scope !5, !noalias !7 ; VF-TWO-CHECK-NEXT: [[TMP120:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 24 ; VF-TWO-CHECK-NEXT: [[TMP121:%.*]] = bitcast float* [[TMP120]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP121]], align 4 +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP121]], align 4, !alias.scope !5, !noalias !7 ; VF-TWO-CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 28 ; VF-TWO-CHECK-NEXT: [[TMP123:%.*]] = bitcast float* [[TMP122]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP123]], align 4 +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP123]], align 4, !alias.scope !5, !noalias !7 ; VF-TWO-CHECK-NEXT: [[TMP124:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 32 ; VF-TWO-CHECK-NEXT: [[TMP125:%.*]] = bitcast float* [[TMP124]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP125]], align 4 +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP125]], align 4, !alias.scope !5, !noalias !7 ; VF-TWO-CHECK-NEXT: [[TMP126:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 36 ; VF-TWO-CHECK-NEXT: [[TMP127:%.*]] = bitcast float* [[TMP126]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP127]], align 4 +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP127]], align 4, !alias.scope !5, !noalias !7 ; VF-TWO-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 40 ; VF-TWO-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP129]], align 4 +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP129]], align 4, !alias.scope !5, !noalias !7 ; VF-TWO-CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 44 ; VF-TWO-CHECK-NEXT: [[TMP131:%.*]] = bitcast float* [[TMP130]] to <4 x float>* -; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP131]], align 4 +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP131]], align 4, !alias.scope !5, !noalias !7 ; VF-TWO-CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 48 ; VF-TWO-CHECK-NEXT: [[TMP132:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VF-TWO-CHECK-NEXT: br i1 [[TMP132]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOPID_MV:!.*]] +; VF-TWO-CHECK-NEXT: br i1 [[TMP132]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] ; VF-TWO-CHECK: middle.block: ; VF-TWO-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; VF-TWO-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; VF-TWO-CHECK: vec.epilog.iter.check: ; VF-TWO-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; VF-TWO-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 -; VF-TWO-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[SCALAR_PH]], label [[VEC_EPILOG_PH]] +; VF-TWO-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; VF-TWO-CHECK: vec.epilog.ph: ; VF-TWO-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; VF-TWO-CHECK-NEXT: [[N_MOD_VF36:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2 ; VF-TWO-CHECK-NEXT: [[N_VEC37:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF36]] -; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_BODY:%.*]] +; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; VF-TWO-CHECK: vec.epilog.vector.body: -; VF-TWO-CHECK-NEXT: [[INDEX38:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT39:%.*]], [[VEC_EPILOG_BODY]] ] +; VF-TWO-CHECK-NEXT: [[INDEX38:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT39:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; VF-TWO-CHECK-NEXT: [[TMP133:%.*]] = add i64 [[INDEX38]], 0 ; VF-TWO-CHECK-NEXT: [[TMP134:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP133]] ; VF-TWO-CHECK-NEXT: [[TMP135:%.*]] = getelementptr inbounds float, float* [[TMP134]], i32 0 @@ -244,15 +244,15 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; VF-TWO-CHECK-NEXT: store <2 x float> [[TMP140]], <2 x float>* [[TMP143]], align 4 ; VF-TWO-CHECK-NEXT: [[INDEX_NEXT39]] = add i64 [[INDEX38]], 2 ; VF-TWO-CHECK-NEXT: [[TMP144:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC37]] -; VF-TWO-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_BODY]], !llvm.loop [[LOOPID_EV:!.*]] +; VF-TWO-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] ; VF-TWO-CHECK: vec.epilog.middle.block: ; VF-TWO-CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC37]] -; VF-TWO-CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; VF-TWO-CHECK: scalar.ph: -; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC37]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; VF-TWO-CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; VF-TWO-CHECK: vec.epilog.scalar.ph: +; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC37]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ] ; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]] ; VF-TWO-CHECK: for.body: -; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; VF-TWO-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[INDVARS_IV]] ; VF-TWO-CHECK-NEXT: [[TMP145:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; VF-TWO-CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[INDVARS_IV]] @@ -262,7 +262,7 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; VF-TWO-CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4 ; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOPID_MS:!.*]] +; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP12:!llvm.loop !.*]] ; VF-TWO-CHECK: for.end.loopexit.loopexit: ; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] ; VF-TWO-CHECK: for.end.loopexit: @@ -270,10 +270,269 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; VF-TWO-CHECK: for.end: ; VF-TWO-CHECK-NEXT: ret void ; -; VF-TWO-CHECK-DAG: [[LOOPID_MV]] = distinct !{[[LOOPID_MV]], [[LOOPID_DISABLE_VECT:!.*]]} -; VF-TWO-CHECK-DAG: [[LOOPID_EV]] = distinct !{[[LOOPID_EV]], [[LOOPID_DISABLE_UNROLL:!.*]], [[LOOPID_DISABLE_VECT:!.*]]} -; VF-TWO-CHECK-DAG: [[LOOPID_DISABLE_VECT]] = [[DISABLE_VECT_STR:!{!"llvm.loop.isvectorized".*}.*]] -; VF-TWO-CHECK-DAG: [[LOOPID_DISABLE_UNROLL]] = [[DISABLE_UNROLL_STR:!{!"llvm.loop.unroll.runtime.disable"}.*]] +; VF-FOUR-CHECK-LABEL: @f1( +; VF-FOUR-CHECK-NEXT: entry: +; VF-FOUR-CHECK-NEXT: [[AA1:%.*]] = bitcast float* [[AA:%.*]] to i8* +; VF-FOUR-CHECK-NEXT: [[BB3:%.*]] = bitcast float* [[BB:%.*]] to i8* +; VF-FOUR-CHECK-NEXT: [[CC6:%.*]] = bitcast float* [[CC:%.*]] to i8* +; VF-FOUR-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; VF-FOUR-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]] +; VF-FOUR-CHECK: iter.check: +; VF-FOUR-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; VF-FOUR-CHECK: vector.memcheck: +; VF-FOUR-CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[AA]], i64 [[WIDE_TRIP_COUNT]] +; VF-FOUR-CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8* +; VF-FOUR-CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[BB]], i64 [[WIDE_TRIP_COUNT]] +; VF-FOUR-CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8* +; VF-FOUR-CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr float, float* [[CC]], i64 [[WIDE_TRIP_COUNT]] +; VF-FOUR-CHECK-NEXT: [[SCEVGEP78:%.*]] = bitcast float* [[SCEVGEP7]] to i8* +; VF-FOUR-CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[AA1]], [[SCEVGEP45]] +; VF-FOUR-CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[BB3]], [[SCEVGEP2]] +; VF-FOUR-CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; VF-FOUR-CHECK-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[AA1]], [[SCEVGEP78]] +; VF-FOUR-CHECK-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[CC6]], [[SCEVGEP2]] +; VF-FOUR-CHECK-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]] +; VF-FOUR-CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]] +; VF-FOUR-CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true +; VF-FOUR-CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; VF-FOUR-CHECK: vector.main.loop.iter.check: +; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK12:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 48 +; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK12]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF-FOUR-CHECK: vector.ph: +; VF-FOUR-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 48 +; VF-FOUR-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; VF-FOUR-CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; VF-FOUR-CHECK: vector.body: +; VF-FOUR-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VF-FOUR-CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; VF-FOUR-CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 +; VF-FOUR-CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 +; VF-FOUR-CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 +; VF-FOUR-CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 16 +; VF-FOUR-CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 20 +; VF-FOUR-CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 24 +; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 28 +; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 32 +; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 36 +; VF-FOUR-CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 40 +; VF-FOUR-CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 44 +; VF-FOUR-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP0]] +; VF-FOUR-CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP1]] +; VF-FOUR-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP2]] +; VF-FOUR-CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP3]] +; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP4]] +; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP5]] +; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP6]] +; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP7]] +; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP8]] +; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP9]] +; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP10]] +; VF-FOUR-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP11]] +; VF-FOUR-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 +; VF-FOUR-CHECK-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP25]], align 4, !alias.scope !0 +; VF-FOUR-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4 +; VF-FOUR-CHECK-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, <4 x float>* [[TMP27]], align 4, !alias.scope !0 +; VF-FOUR-CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 8 +; VF-FOUR-CHECK-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP29]], align 4, !alias.scope !0 +; VF-FOUR-CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 12 +; VF-FOUR-CHECK-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP31]], align 4, !alias.scope !0 +; VF-FOUR-CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 16 +; VF-FOUR-CHECK-NEXT: [[TMP33:%.*]] = bitcast float* [[TMP32]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP33]], align 4, !alias.scope !0 +; VF-FOUR-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 20 +; VF-FOUR-CHECK-NEXT: [[TMP35:%.*]] = bitcast float* [[TMP34]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <4 x float>, <4 x float>* [[TMP35]], align 4, !alias.scope !0 +; VF-FOUR-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 24 +; VF-FOUR-CHECK-NEXT: [[TMP37:%.*]] = bitcast float* [[TMP36]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP37]], align 4, !alias.scope !0 +; VF-FOUR-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP39:%.*]] = bitcast float* [[TMP38]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <4 x float>, <4 x float>* [[TMP39]], align 4, !alias.scope !0 +; VF-FOUR-CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 32 +; VF-FOUR-CHECK-NEXT: [[TMP41:%.*]] = bitcast float* [[TMP40]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP41]], align 4, !alias.scope !0 +; VF-FOUR-CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 36 +; VF-FOUR-CHECK-NEXT: [[TMP43:%.*]] = bitcast float* [[TMP42]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <4 x float>, <4 x float>* [[TMP43]], align 4, !alias.scope !0 +; VF-FOUR-CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 40 +; VF-FOUR-CHECK-NEXT: [[TMP45:%.*]] = bitcast float* [[TMP44]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD22:%.*]] = load <4 x float>, <4 x float>* [[TMP45]], align 4, !alias.scope !0 +; VF-FOUR-CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 44 +; VF-FOUR-CHECK-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP46]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, <4 x float>* [[TMP47]], align 4, !alias.scope !0 +; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP0]] +; VF-FOUR-CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP1]] +; VF-FOUR-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP2]] +; VF-FOUR-CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP3]] +; VF-FOUR-CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP4]] +; VF-FOUR-CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP5]] +; VF-FOUR-CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP6]] +; VF-FOUR-CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP7]] +; VF-FOUR-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP8]] +; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP9]] +; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP10]] +; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP11]] +; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 0 +; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP60]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <4 x float>, <4 x float>* [[TMP61]], align 4, !alias.scope !3 +; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 4 +; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = bitcast float* [[TMP62]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <4 x float>, <4 x float>* [[TMP63]], align 4, !alias.scope !3 +; VF-FOUR-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 8 +; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = bitcast float* [[TMP64]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <4 x float>, <4 x float>* [[TMP65]], align 4, !alias.scope !3 +; VF-FOUR-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 12 +; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = bitcast float* [[TMP66]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD27:%.*]] = load <4 x float>, <4 x float>* [[TMP67]], align 4, !alias.scope !3 +; VF-FOUR-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 16 +; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = bitcast float* [[TMP68]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD28:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4, !alias.scope !3 +; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 20 +; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP70]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD29:%.*]] = load <4 x float>, <4 x float>* [[TMP71]], align 4, !alias.scope !3 +; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 24 +; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = bitcast float* [[TMP72]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <4 x float>, <4 x float>* [[TMP73]], align 4, !alias.scope !3 +; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = bitcast float* [[TMP74]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4, !alias.scope !3 +; VF-FOUR-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 32 +; VF-FOUR-CHECK-NEXT: [[TMP77:%.*]] = bitcast float* [[TMP76]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <4 x float>, <4 x float>* [[TMP77]], align 4, !alias.scope !3 +; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 36 +; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = bitcast float* [[TMP78]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <4 x float>, <4 x float>* [[TMP79]], align 4, !alias.scope !3 +; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 40 +; VF-FOUR-CHECK-NEXT: [[TMP81:%.*]] = bitcast float* [[TMP80]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4, !alias.scope !3 +; VF-FOUR-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 44 +; VF-FOUR-CHECK-NEXT: [[TMP83:%.*]] = bitcast float* [[TMP82]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <4 x float>, <4 x float>* [[TMP83]], align 4, !alias.scope !3 +; VF-FOUR-CHECK-NEXT: [[TMP84:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD24]] +; VF-FOUR-CHECK-NEXT: [[TMP85:%.*]] = fadd fast <4 x float> [[WIDE_LOAD13]], [[WIDE_LOAD25]] +; VF-FOUR-CHECK-NEXT: [[TMP86:%.*]] = fadd fast <4 x float> [[WIDE_LOAD14]], [[WIDE_LOAD26]] +; VF-FOUR-CHECK-NEXT: [[TMP87:%.*]] = fadd fast <4 x float> [[WIDE_LOAD15]], [[WIDE_LOAD27]] +; VF-FOUR-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[WIDE_LOAD16]], [[WIDE_LOAD28]] +; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[WIDE_LOAD17]], [[WIDE_LOAD29]] +; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[WIDE_LOAD18]], [[WIDE_LOAD30]] +; VF-FOUR-CHECK-NEXT: [[TMP91:%.*]] = fadd fast <4 x float> [[WIDE_LOAD19]], [[WIDE_LOAD31]] +; VF-FOUR-CHECK-NEXT: [[TMP92:%.*]] = fadd fast <4 x float> [[WIDE_LOAD20]], [[WIDE_LOAD32]] +; VF-FOUR-CHECK-NEXT: [[TMP93:%.*]] = fadd fast <4 x float> [[WIDE_LOAD21]], [[WIDE_LOAD33]] +; VF-FOUR-CHECK-NEXT: [[TMP94:%.*]] = fadd fast <4 x float> [[WIDE_LOAD22]], [[WIDE_LOAD34]] +; VF-FOUR-CHECK-NEXT: [[TMP95:%.*]] = fadd fast <4 x float> [[WIDE_LOAD23]], [[WIDE_LOAD35]] +; VF-FOUR-CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP0]] +; VF-FOUR-CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP1]] +; VF-FOUR-CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP2]] +; VF-FOUR-CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP3]] +; VF-FOUR-CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP4]] +; VF-FOUR-CHECK-NEXT: [[TMP101:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP5]] +; VF-FOUR-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP6]] +; VF-FOUR-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP7]] +; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP8]] +; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP9]] +; VF-FOUR-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP10]] +; VF-FOUR-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP11]] +; VF-FOUR-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 0 +; VF-FOUR-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP109]], align 4, !alias.scope !5, !noalias !7 +; VF-FOUR-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 4 +; VF-FOUR-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP111]], align 4, !alias.scope !5, !noalias !7 +; VF-FOUR-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 8 +; VF-FOUR-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP113]], align 4, !alias.scope !5, !noalias !7 +; VF-FOUR-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 12 +; VF-FOUR-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP115]], align 4, !alias.scope !5, !noalias !7 +; VF-FOUR-CHECK-NEXT: [[TMP116:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 16 +; VF-FOUR-CHECK-NEXT: [[TMP117:%.*]] = bitcast float* [[TMP116]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP117]], align 4, !alias.scope !5, !noalias !7 +; VF-FOUR-CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 20 +; VF-FOUR-CHECK-NEXT: [[TMP119:%.*]] = bitcast float* [[TMP118]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP119]], align 4, !alias.scope !5, !noalias !7 +; VF-FOUR-CHECK-NEXT: [[TMP120:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 24 +; VF-FOUR-CHECK-NEXT: [[TMP121:%.*]] = bitcast float* [[TMP120]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP121]], align 4, !alias.scope !5, !noalias !7 +; VF-FOUR-CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP123:%.*]] = bitcast float* [[TMP122]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP123]], align 4, !alias.scope !5, !noalias !7 +; VF-FOUR-CHECK-NEXT: [[TMP124:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 32 +; VF-FOUR-CHECK-NEXT: [[TMP125:%.*]] = bitcast float* [[TMP124]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP125]], align 4, !alias.scope !5, !noalias !7 +; VF-FOUR-CHECK-NEXT: [[TMP126:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 36 +; VF-FOUR-CHECK-NEXT: [[TMP127:%.*]] = bitcast float* [[TMP126]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP127]], align 4, !alias.scope !5, !noalias !7 +; VF-FOUR-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 40 +; VF-FOUR-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP129]], align 4, !alias.scope !5, !noalias !7 +; VF-FOUR-CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 44 +; VF-FOUR-CHECK-NEXT: [[TMP131:%.*]] = bitcast float* [[TMP130]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP131]], align 4, !alias.scope !5, !noalias !7 +; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 48 +; VF-FOUR-CHECK-NEXT: [[TMP132:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VF-FOUR-CHECK-NEXT: br i1 [[TMP132]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; VF-FOUR-CHECK: middle.block: +; VF-FOUR-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; VF-FOUR-CHECK: vec.epilog.iter.check: +; VF-FOUR-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; VF-FOUR-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 +; VF-FOUR-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; VF-FOUR-CHECK: vec.epilog.ph: +; VF-FOUR-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; VF-FOUR-CHECK-NEXT: [[N_MOD_VF36:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; VF-FOUR-CHECK-NEXT: [[N_VEC37:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF36]] +; VF-FOUR-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; VF-FOUR-CHECK: vec.epilog.vector.body: +; VF-FOUR-CHECK-NEXT: [[INDEX38:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT39:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; VF-FOUR-CHECK-NEXT: [[TMP133:%.*]] = add i64 [[INDEX38]], 0 +; VF-FOUR-CHECK-NEXT: [[TMP134:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP133]] +; VF-FOUR-CHECK-NEXT: [[TMP135:%.*]] = getelementptr inbounds float, float* [[TMP134]], i32 0 +; VF-FOUR-CHECK-NEXT: [[TMP136:%.*]] = bitcast float* [[TMP135]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD41:%.*]] = load <4 x float>, <4 x float>* [[TMP136]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP137:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP133]] +; VF-FOUR-CHECK-NEXT: [[TMP138:%.*]] = getelementptr inbounds float, float* [[TMP137]], i32 0 +; VF-FOUR-CHECK-NEXT: [[TMP139:%.*]] = bitcast float* [[TMP138]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD42:%.*]] = load <4 x float>, <4 x float>* [[TMP139]], align 4 +; VF-FOUR-CHECK-NEXT: [[TMP140:%.*]] = fadd fast <4 x float> [[WIDE_LOAD41]], [[WIDE_LOAD42]] +; VF-FOUR-CHECK-NEXT: [[TMP141:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP133]] +; VF-FOUR-CHECK-NEXT: [[TMP142:%.*]] = getelementptr inbounds float, float* [[TMP141]], i32 0 +; VF-FOUR-CHECK-NEXT: [[TMP143:%.*]] = bitcast float* [[TMP142]] to <4 x float>* +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP140]], <4 x float>* [[TMP143]], align 4 +; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT39]] = add i64 [[INDEX38]], 4 +; VF-FOUR-CHECK-NEXT: [[TMP144:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC37]] +; VF-FOUR-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] +; VF-FOUR-CHECK: vec.epilog.middle.block: +; VF-FOUR-CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC37]] +; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; VF-FOUR-CHECK: vec.epilog.scalar.ph: +; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC37]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ] +; VF-FOUR-CHECK-NEXT: br label [[FOR_BODY:%.*]] +; VF-FOUR-CHECK: for.body: +; VF-FOUR-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; VF-FOUR-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[INDVARS_IV]] +; VF-FOUR-CHECK-NEXT: [[TMP145:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; VF-FOUR-CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[INDVARS_IV]] +; VF-FOUR-CHECK-NEXT: [[TMP146:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +; VF-FOUR-CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP145]], [[TMP146]] +; VF-FOUR-CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[INDVARS_IV]] +; VF-FOUR-CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4 +; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VF-FOUR-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP12:!llvm.loop !.*]] +; VF-FOUR-CHECK: for.end.loopexit.loopexit: +; VF-FOUR-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; VF-FOUR-CHECK: for.end.loopexit: +; VF-FOUR-CHECK-NEXT: br label [[FOR_END]] +; VF-FOUR-CHECK: for.end: +; VF-FOUR-CHECK-NEXT: ret void +; entry: @@ -305,6 +564,254 @@ for.end: ; preds = %for.end.loopexit, % } define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signext %n) #0 { +; VF-TWO-CHECK-LABEL: @f2( +; VF-TWO-CHECK-NEXT: entry: +; VF-TWO-CHECK-NEXT: [[A1:%.*]] = bitcast float* [[A:%.*]] to i8* +; VF-TWO-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 1 +; VF-TWO-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]] +; VF-TWO-CHECK: iter.check: +; VF-TWO-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; VF-TWO-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64 +; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2 +; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; VF-TWO-CHECK: vector.scevcheck: +; VF-TWO-CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 +; VF-TWO-CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; VF-TWO-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]]) +; VF-TWO-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 +; VF-TWO-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 +; VF-TWO-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]] +; VF-TWO-CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]] +; VF-TWO-CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]] +; VF-TWO-CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]] +; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]] +; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 +; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; VF-TWO-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] +; VF-TWO-CHECK-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] +; VF-TWO-CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] +; VF-TWO-CHECK: vector.memcheck: +; VF-TWO-CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A]], i64 [[WIDE_TRIP_COUNT]] +; VF-TWO-CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8* +; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP0]] to i64 +; VF-TWO-CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 1 +; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[TMP13]], [[WIDE_TRIP_COUNT]] +; VF-TWO-CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr float, float* [[B:%.*]], i64 [[TMP14]] +; VF-TWO-CHECK-NEXT: [[SCEVGEP34:%.*]] = bitcast float* [[SCEVGEP3]] to i8* +; VF-TWO-CHECK-NEXT: [[TMP15:%.*]] = add nsw i64 [[TMP12]], 1 +; VF-TWO-CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr float, float* [[B]], i64 [[TMP15]] +; VF-TWO-CHECK-NEXT: [[SCEVGEP56:%.*]] = bitcast float* [[SCEVGEP5]] to i8* +; VF-TWO-CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP56]] +; VF-TWO-CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]] +; VF-TWO-CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; VF-TWO-CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true +; VF-TWO-CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; VF-TWO-CHECK: vector.main.loop.iter.check: +; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 +; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK7]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF-TWO-CHECK: vector.ph: +; VF-TWO-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32 +; VF-TWO-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; VF-TWO-CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; VF-TWO-CHECK: vector.body: +; VF-TWO-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 0 +; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 4 +; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 8 +; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 12 +; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 16 +; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 20 +; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 24 +; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 28 +; VF-TWO-CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 +; VF-TWO-CHECK-NEXT: [[TMP24:%.*]] = add i32 [[OFFSET_IDX]], 0 +; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 4 +; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 8 +; VF-TWO-CHECK-NEXT: [[TMP27:%.*]] = add i32 [[OFFSET_IDX]], 12 +; VF-TWO-CHECK-NEXT: [[TMP28:%.*]] = add i32 [[OFFSET_IDX]], 16 +; VF-TWO-CHECK-NEXT: [[TMP29:%.*]] = add i32 [[OFFSET_IDX]], 20 +; VF-TWO-CHECK-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], 24 +; VF-TWO-CHECK-NEXT: [[TMP31:%.*]] = add i32 [[OFFSET_IDX]], 28 +; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = xor i32 [[TMP24]], -1 +; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = xor i32 [[TMP25]], -1 +; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = xor i32 [[TMP26]], -1 +; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = xor i32 [[TMP27]], -1 +; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = xor i32 [[TMP28]], -1 +; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = xor i32 [[TMP29]], -1 +; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = xor i32 [[TMP30]], -1 +; VF-TWO-CHECK-NEXT: [[TMP39:%.*]] = xor i32 [[TMP31]], -1 +; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP32]], [[N]] +; VF-TWO-CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP33]], [[N]] +; VF-TWO-CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP34]], [[N]] +; VF-TWO-CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP35]], [[N]] +; VF-TWO-CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP36]], [[N]] +; VF-TWO-CHECK-NEXT: [[TMP45:%.*]] = add i32 [[TMP37]], [[N]] +; VF-TWO-CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP38]], [[N]] +; VF-TWO-CHECK-NEXT: [[TMP47:%.*]] = add i32 [[TMP39]], [[N]] +; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = sext i32 [[TMP40]] to i64 +; VF-TWO-CHECK-NEXT: [[TMP49:%.*]] = sext i32 [[TMP41]] to i64 +; VF-TWO-CHECK-NEXT: [[TMP50:%.*]] = sext i32 [[TMP42]] to i64 +; VF-TWO-CHECK-NEXT: [[TMP51:%.*]] = sext i32 [[TMP43]] to i64 +; VF-TWO-CHECK-NEXT: [[TMP52:%.*]] = sext i32 [[TMP44]] to i64 +; VF-TWO-CHECK-NEXT: [[TMP53:%.*]] = sext i32 [[TMP45]] to i64 +; VF-TWO-CHECK-NEXT: [[TMP54:%.*]] = sext i32 [[TMP46]] to i64 +; VF-TWO-CHECK-NEXT: [[TMP55:%.*]] = sext i32 [[TMP47]] to i64 +; VF-TWO-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP48]] +; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP49]] +; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP50]] +; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP51]] +; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP52]] +; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP53]] +; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP54]] +; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP55]] +; VF-TWO-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 0 +; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, float* [[TMP64]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP66:%.*]] = bitcast float* [[TMP65]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP66]], align 4, !alias.scope !13 +; VF-TWO-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> +; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -4 +; VF-TWO-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, float* [[TMP67]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = bitcast float* [[TMP68]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4, !alias.scope !13 +; VF-TWO-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> +; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -8 +; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, float* [[TMP70]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = bitcast float* [[TMP71]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP72]], align 4, !alias.scope !13 +; VF-TWO-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> +; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -12 +; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, float* [[TMP73]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = bitcast float* [[TMP74]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4, !alias.scope !13 +; VF-TWO-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> +; VF-TWO-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -16 +; VF-TWO-CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds float, float* [[TMP76]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = bitcast float* [[TMP77]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP78]], align 4, !alias.scope !13 +; VF-TWO-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> +; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -20 +; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, float* [[TMP79]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP81:%.*]] = bitcast float* [[TMP80]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4, !alias.scope !13 +; VF-TWO-CHECK-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x float> [[WIDE_LOAD16]], <4 x float> poison, <4 x i32> +; VF-TWO-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -24 +; VF-TWO-CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds float, float* [[TMP82]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP84:%.*]] = bitcast float* [[TMP83]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP84]], align 4, !alias.scope !13 +; VF-TWO-CHECK-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x float> [[WIDE_LOAD18]], <4 x float> poison, <4 x i32> +; VF-TWO-CHECK-NEXT: [[TMP85:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -28 +; VF-TWO-CHECK-NEXT: [[TMP86:%.*]] = getelementptr inbounds float, float* [[TMP85]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP87:%.*]] = bitcast float* [[TMP86]] to <4 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP87]], align 4, !alias.scope !13 +; VF-TWO-CHECK-NEXT: [[REVERSE21:%.*]] = shufflevector <4 x float> [[WIDE_LOAD20]], <4 x float> poison, <4 x i32> +; VF-TWO-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[REVERSE]], +; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[REVERSE9]], +; VF-TWO-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[REVERSE11]], +; VF-TWO-CHECK-NEXT: [[TMP91:%.*]] = fadd fast <4 x float> [[REVERSE13]], +; VF-TWO-CHECK-NEXT: [[TMP92:%.*]] = fadd fast <4 x float> [[REVERSE15]], +; VF-TWO-CHECK-NEXT: [[TMP93:%.*]] = fadd fast <4 x float> [[REVERSE17]], +; VF-TWO-CHECK-NEXT: [[TMP94:%.*]] = fadd fast <4 x float> [[REVERSE19]], +; VF-TWO-CHECK-NEXT: [[TMP95:%.*]] = fadd fast <4 x float> [[REVERSE21]], +; VF-TWO-CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] +; VF-TWO-CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP17]] +; VF-TWO-CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP18]] +; VF-TWO-CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP19]] +; VF-TWO-CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]] +; VF-TWO-CHECK-NEXT: [[TMP101:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]] +; VF-TWO-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP22]] +; VF-TWO-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP23]] +; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 0 +; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = bitcast float* [[TMP104]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP105]], align 4, !alias.scope !16, !noalias !13 +; VF-TWO-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 4 +; VF-TWO-CHECK-NEXT: [[TMP107:%.*]] = bitcast float* [[TMP106]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP107]], align 4, !alias.scope !16, !noalias !13 +; VF-TWO-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 8 +; VF-TWO-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP109]], align 4, !alias.scope !16, !noalias !13 +; VF-TWO-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 12 +; VF-TWO-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP111]], align 4, !alias.scope !16, !noalias !13 +; VF-TWO-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 16 +; VF-TWO-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP113]], align 4, !alias.scope !16, !noalias !13 +; VF-TWO-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 20 +; VF-TWO-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP115]], align 4, !alias.scope !16, !noalias !13 +; VF-TWO-CHECK-NEXT: [[TMP116:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 24 +; VF-TWO-CHECK-NEXT: [[TMP117:%.*]] = bitcast float* [[TMP116]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP117]], align 4, !alias.scope !16, !noalias !13 +; VF-TWO-CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP119:%.*]] = bitcast float* [[TMP118]] to <4 x float>* +; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP119]], align 4, !alias.scope !16, !noalias !13 +; VF-TWO-CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32 +; VF-TWO-CHECK-NEXT: [[TMP120:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VF-TWO-CHECK-NEXT: br i1 [[TMP120]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP18:!llvm.loop !.*]] +; VF-TWO-CHECK: middle.block: +; VF-TWO-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; VF-TWO-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; VF-TWO-CHECK: vec.epilog.iter.check: +; VF-TWO-CHECK-NEXT: [[IND_END27:%.*]] = trunc i64 [[N_VEC]] to i32 +; VF-TWO-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; VF-TWO-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 +; VF-TWO-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; VF-TWO-CHECK: vec.epilog.ph: +; VF-TWO-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; VF-TWO-CHECK-NEXT: [[N_MOD_VF22:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2 +; VF-TWO-CHECK-NEXT: [[N_VEC23:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF22]] +; VF-TWO-CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC23]] to i32 +; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; VF-TWO-CHECK: vec.epilog.vector.body: +; VF-TWO-CHECK-NEXT: [[INDEX24:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; VF-TWO-CHECK-NEXT: [[TMP121:%.*]] = add i64 [[INDEX24]], 0 +; VF-TWO-CHECK-NEXT: [[OFFSET_IDX29:%.*]] = trunc i64 [[INDEX24]] to i32 +; VF-TWO-CHECK-NEXT: [[TMP122:%.*]] = add i32 [[OFFSET_IDX29]], 0 +; VF-TWO-CHECK-NEXT: [[TMP123:%.*]] = xor i32 [[TMP122]], -1 +; VF-TWO-CHECK-NEXT: [[TMP124:%.*]] = add i32 [[TMP123]], [[N]] +; VF-TWO-CHECK-NEXT: [[TMP125:%.*]] = sext i32 [[TMP124]] to i64 +; VF-TWO-CHECK-NEXT: [[TMP126:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP125]] +; VF-TWO-CHECK-NEXT: [[TMP127:%.*]] = getelementptr inbounds float, float* [[TMP126]], i32 0 +; VF-TWO-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP127]], i32 -1 +; VF-TWO-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <2 x float>* +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <2 x float>, <2 x float>* [[TMP129]], align 4 +; VF-TWO-CHECK-NEXT: [[REVERSE31:%.*]] = shufflevector <2 x float> [[WIDE_LOAD30]], <2 x float> poison, <2 x i32> +; VF-TWO-CHECK-NEXT: [[TMP130:%.*]] = fadd fast <2 x float> [[REVERSE31]], +; VF-TWO-CHECK-NEXT: [[TMP131:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP121]] +; VF-TWO-CHECK-NEXT: [[TMP132:%.*]] = getelementptr inbounds float, float* [[TMP131]], i32 0 +; VF-TWO-CHECK-NEXT: [[TMP133:%.*]] = bitcast float* [[TMP132]] to <2 x float>* +; VF-TWO-CHECK-NEXT: store <2 x float> [[TMP130]], <2 x float>* [[TMP133]], align 4 +; VF-TWO-CHECK-NEXT: [[INDEX_NEXT25]] = add i64 [[INDEX24]], 2 +; VF-TWO-CHECK-NEXT: [[TMP134:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC23]] +; VF-TWO-CHECK-NEXT: br i1 [[TMP134]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP19:!llvm.loop !.*]] +; VF-TWO-CHECK: vec.epilog.middle.block: +; VF-TWO-CHECK-NEXT: [[CMP_N28:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC23]] +; VF-TWO-CHECK-NEXT: br i1 [[CMP_N28]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; VF-TWO-CHECK: vec.epilog.scalar.ph: +; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ] +; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END27]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ] +; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]] +; VF-TWO-CHECK: for.body: +; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; VF-TWO-CHECK-NEXT: [[I_014:%.*]] = phi i32 [ [[BC_RESUME_VAL26]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; VF-TWO-CHECK-NEXT: [[TMP135:%.*]] = xor i32 [[I_014]], -1 +; VF-TWO-CHECK-NEXT: [[SUB2:%.*]] = add i32 [[TMP135]], [[N]] +; VF-TWO-CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB2]] to i64 +; VF-TWO-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IDXPROM]] +; VF-TWO-CHECK-NEXT: [[TMP136:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; VF-TWO-CHECK-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP136]], 1.000000e+00 +; VF-TWO-CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] +; VF-TWO-CHECK-NEXT: store float [[CONV3]], float* [[ARRAYIDX5]], align 4 +; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VF-TWO-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1 +; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP20:!llvm.loop !.*]] +; VF-TWO-CHECK: for.end.loopexit.loopexit: +; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; VF-TWO-CHECK: for.end.loopexit: +; VF-TWO-CHECK-NEXT: br label [[FOR_END]] +; VF-TWO-CHECK: for.end: +; VF-TWO-CHECK-NEXT: ret i32 0 +; ; VF-FOUR-CHECK-LABEL: @f2( ; VF-FOUR-CHECK-NEXT: entry: ; VF-FOUR-CHECK-NEXT: [[A1:%.*]] = bitcast float* [[A:%.*]] to i8* @@ -330,7 +837,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; VF-FOUR-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] ; VF-FOUR-CHECK-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] -; VF-FOUR-CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MEM_CHECK:%.*]] +; VF-FOUR-CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; VF-FOUR-CHECK: vector.memcheck: ; VF-FOUR-CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A]], i64 [[WIDE_TRIP_COUNT]] ; VF-FOUR-CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8* @@ -408,43 +915,43 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 0 ; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, float* [[TMP64]], i32 -3 ; VF-FOUR-CHECK-NEXT: [[TMP66:%.*]] = bitcast float* [[TMP65]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP66]], align 4 -; VF-FOUR-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> undef, <4 x i32> +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP66]], align 4, !alias.scope !13 +; VF-FOUR-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -4 ; VF-FOUR-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, float* [[TMP67]], i32 -3 ; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = bitcast float* [[TMP68]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4 -; VF-FOUR-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> undef, <4 x i32> +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4, !alias.scope !13 +; VF-FOUR-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -8 ; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, float* [[TMP70]], i32 -3 ; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = bitcast float* [[TMP71]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP72]], align 4 -; VF-FOUR-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> undef, <4 x i32> +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP72]], align 4, !alias.scope !13 +; VF-FOUR-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -12 ; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, float* [[TMP73]], i32 -3 ; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = bitcast float* [[TMP74]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4 -; VF-FOUR-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> undef, <4 x i32> +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4, !alias.scope !13 +; VF-FOUR-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -16 ; VF-FOUR-CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds float, float* [[TMP76]], i32 -3 ; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = bitcast float* [[TMP77]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP78]], align 4 -; VF-FOUR-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> undef, <4 x i32> +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP78]], align 4, !alias.scope !13 +; VF-FOUR-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -20 ; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, float* [[TMP79]], i32 -3 ; VF-FOUR-CHECK-NEXT: [[TMP81:%.*]] = bitcast float* [[TMP80]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4 -; VF-FOUR-CHECK-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x float> [[WIDE_LOAD16]], <4 x float> undef, <4 x i32> +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4, !alias.scope !13 +; VF-FOUR-CHECK-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x float> [[WIDE_LOAD16]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -24 ; VF-FOUR-CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds float, float* [[TMP82]], i32 -3 ; VF-FOUR-CHECK-NEXT: [[TMP84:%.*]] = bitcast float* [[TMP83]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP84]], align 4 -; VF-FOUR-CHECK-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x float> [[WIDE_LOAD18]], <4 x float> undef, <4 x i32> +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP84]], align 4, !alias.scope !13 +; VF-FOUR-CHECK-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x float> [[WIDE_LOAD18]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP85:%.*]] = getelementptr inbounds float, float* [[TMP56]], i32 -28 ; VF-FOUR-CHECK-NEXT: [[TMP86:%.*]] = getelementptr inbounds float, float* [[TMP85]], i32 -3 ; VF-FOUR-CHECK-NEXT: [[TMP87:%.*]] = bitcast float* [[TMP86]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP87]], align 4 -; VF-FOUR-CHECK-NEXT: [[REVERSE21:%.*]] = shufflevector <4 x float> [[WIDE_LOAD20]], <4 x float> undef, <4 x i32> +; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP87]], align 4, !alias.scope !13 +; VF-FOUR-CHECK-NEXT: [[REVERSE21:%.*]] = shufflevector <4 x float> [[WIDE_LOAD20]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[REVERSE]], ; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[REVERSE9]], ; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[REVERSE11]], @@ -463,31 +970,31 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP23]] ; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 0 ; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = bitcast float* [[TMP104]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP105]], align 4 +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP105]], align 4, !alias.scope !16, !noalias !13 ; VF-FOUR-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 4 ; VF-FOUR-CHECK-NEXT: [[TMP107:%.*]] = bitcast float* [[TMP106]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP107]], align 4 +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP107]], align 4, !alias.scope !16, !noalias !13 ; VF-FOUR-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 8 ; VF-FOUR-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP109]], align 4 +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP109]], align 4, !alias.scope !16, !noalias !13 ; VF-FOUR-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 12 ; VF-FOUR-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP111]], align 4 +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP111]], align 4, !alias.scope !16, !noalias !13 ; VF-FOUR-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 16 ; VF-FOUR-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP113]], align 4 +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP113]], align 4, !alias.scope !16, !noalias !13 ; VF-FOUR-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 20 ; VF-FOUR-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP115]], align 4 +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP115]], align 4, !alias.scope !16, !noalias !13 ; VF-FOUR-CHECK-NEXT: [[TMP116:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 24 ; VF-FOUR-CHECK-NEXT: [[TMP117:%.*]] = bitcast float* [[TMP116]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP117]], align 4 +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP117]], align 4, !alias.scope !16, !noalias !13 ; VF-FOUR-CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 28 ; VF-FOUR-CHECK-NEXT: [[TMP119:%.*]] = bitcast float* [[TMP118]] to <4 x float>* -; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP119]], align 4 +; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP119]], align 4, !alias.scope !16, !noalias !13 ; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32 ; VF-FOUR-CHECK-NEXT: [[TMP120:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VF-FOUR-CHECK-NEXT: br i1 [[TMP120]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOPID_MV_CM:!.*]] +; VF-FOUR-CHECK-NEXT: br i1 [[TMP120]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP18:!llvm.loop !.*]] ; VF-FOUR-CHECK: middle.block: ; VF-FOUR-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -515,7 +1022,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP127]], i32 -3 ; VF-FOUR-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <4 x float>* ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <4 x float>, <4 x float>* [[TMP129]], align 4 -; VF-FOUR-CHECK-NEXT: [[REVERSE31:%.*]] = shufflevector <4 x float> [[WIDE_LOAD30]], <4 x float> undef, <4 x i32> +; VF-FOUR-CHECK-NEXT: [[REVERSE31:%.*]] = shufflevector <4 x float> [[WIDE_LOAD30]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP130:%.*]] = fadd fast <4 x float> [[REVERSE31]], ; VF-FOUR-CHECK-NEXT: [[TMP131:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP121]] ; VF-FOUR-CHECK-NEXT: [[TMP132:%.*]] = getelementptr inbounds float, float* [[TMP131]], i32 0 @@ -523,13 +1030,13 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP130]], <4 x float>* [[TMP133]], align 4 ; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT25]] = add i64 [[INDEX24]], 4 ; VF-FOUR-CHECK-NEXT: [[TMP134:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC23]] -; VF-FOUR-CHECK-NEXT: br i1 [[TMP134]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOPID_EV_CM:!.*]] +; VF-FOUR-CHECK-NEXT: br i1 [[TMP134]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP19:!llvm.loop !.*]] ; VF-FOUR-CHECK: vec.epilog.middle.block: ; VF-FOUR-CHECK-NEXT: [[CMP_N28:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC23]] ; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N28]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-FOUR-CHECK: vec.epilog.scalar.ph: -; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEM_CHECK]] ], [ 0, [[ITER_CHECK]] ] -; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END27]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEM_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ] +; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END27]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ] ; VF-FOUR-CHECK-NEXT: br label [[FOR_BODY:%.*]] ; VF-FOUR-CHECK: for.body: ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -545,18 +1052,15 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-FOUR-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1 ; VF-FOUR-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOPID_MS_CM:!.*]] +; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP20:!llvm.loop !.*]] ; VF-FOUR-CHECK: for.end.loopexit.loopexit: ; VF-FOUR-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] ; VF-FOUR-CHECK: for.end.loopexit: ; VF-FOUR-CHECK-NEXT: br label [[FOR_END]] ; VF-FOUR-CHECK: for.end: ; VF-FOUR-CHECK-NEXT: ret i32 0 +; -; VF-FOUR-CHECK-DAG: [[LOOPID_MV_CM]] = distinct !{[[LOOPID_MV_CM]], [[LOOPID_DISABLE_VECT_CM:!.*]]} -; VF-FOUR-CHECK-DAG: [[LOOPID_EV_CM]] = distinct !{[[LOOPID_EV_CM]], [[LOOPID_DISABLE_UNROLL_CM:!.*]], [[LOOPID_DISABLE_VECT_CM:!.*]]} -; VF-FOUR-CHECK-DAG: [[LOOPID_DISABLE_VECT_CM]] = [[DISABLE_VECT_STR_CM:!{!"llvm.loop.isvectorized".*}.*]] -; VF-FOUR-CHECK-DAG: [[LOOPID_DISABLE_UNROLL_CM]] = [[DISABLE_UNROLL_STR_CM:!{!"llvm.loop.unroll.runtime.disable"}.*]] entry: %cmp1 = icmp sgt i32 %n, 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index f3bacdac5e97..7c63a912e3ee 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -20,8 +20,8 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NOT: LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] ; CHECK-NOT: LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 5 ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x float> undef, float %x, i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x float> [[BROADCAST_SPLATINSERT]], <16 x float> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float %x, i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x float> [[BROADCAST_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label %vector.body ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] @@ -30,13 +30,13 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <80 x float>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <80 x float>, <80 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <80 x float> [[WIDE_VEC]], <80 x float> undef, <16 x i32> +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <80 x float> [[WIDE_VEC]], <80 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <16 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds %data, %data* %d, i64 0, i32 0, <16 x i64> [[VEC_IND]] ; CHECK-NEXT: [[BC:%.*]] = bitcast <16 x float*> [[TMP3]] to <16 x <80 x float>*> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x <80 x float>*> [[BC]], i32 0 ; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <80 x float>, <80 x float>* [[TMP4]], align 4 -; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <80 x float> [[WIDE_VEC1]], <80 x float> undef, <16 x i32> +; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <80 x float> [[WIDE_VEC1]], <80 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fadd <16 x float> [[STRIDED_VEC2]], [[TMP2]] ; CHECK-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[TMP5]], <16 x float*> [[TMP3]], i32 4, <16 x i1> ) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index 8ddb4b4fa8cf..3161a09d3dd2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -19,8 +19,8 @@ define void @f1() { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[TMP0]] to i64 @@ -30,7 +30,7 @@ define void @f1() { ; CHECK-NEXT: store <2 x i16*> , <2 x i16*>* [[TMP4]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 2 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 2, 2 ; CHECK-NEXT: br i1 [[CMP_N]], label [[BB3:%.*]], label [[SCALAR_PH]] @@ -44,10 +44,10 @@ define void @f1() { ; CHECK-NEXT: [[_TMP4:%.*]] = bitcast %rec8* [[_TMP2]] to i16* ; CHECK-NEXT: [[_TMP6:%.*]] = sext i16 [[C_1_0]] to i64 ; CHECK-NEXT: [[_TMP7:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[_TMP6]] -; CHECK-NEXT: store i16* [[_TMP4]], i16** [[_TMP7]] +; CHECK-NEXT: store i16* [[_TMP4]], i16** [[_TMP7]], align 8 ; CHECK-NEXT: [[_TMP9]] = add nsw i16 [[C_1_0]], 1 ; CHECK-NEXT: [[_TMP11:%.*]] = icmp slt i16 [[_TMP9]], 2 -; CHECK-NEXT: br i1 [[_TMP11]], label [[BB2]], label [[BB3]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[_TMP11]], label [[BB2]], label [[BB3]], [[LOOP2:!llvm.loop !.*]] ; CHECK: bb3: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll index 8363af8b2206..1b9c8c514691 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll @@ -18,10 +18,10 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 { ; CHECK: if.then: ; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> undef, i8 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i8> undef, i8 [[X]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT2]], <4 x i8> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i8> poison, i8 [[X]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT2]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -36,11 +36,11 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw <4 x i32> [[TMP4]], ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[P:%.*]], align 1, [[TBAA1:!tbaa !.*]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> undef, i8 [[TMP8]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT4]], <4 x i8> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP8]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT4]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> undef, i8 [[TMP9]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw <4 x i32> [[TMP10]], diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll index bd887c072deb..eb016be067e9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll @@ -37,8 +37,8 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) ; AVX-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] ; AVX-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <8 x i32>* ; AVX-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 -; AVX-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> -; AVX-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> +; AVX-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> +; AVX-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; AVX-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC]] ; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; AVX-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>* diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll index 445db6aea08c..47456e187eeb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll @@ -28,10 +28,10 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK5]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32*> undef, i32* [[A]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT]], <16 x i32*> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> undef, i32 [[NTRUNC]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT6]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32*> poison, i32* [[A]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT]], <16 x i32*> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -57,10 +57,10 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX9:%.*]] = select i1 [[TMP7]], i64 [[N]], i64 1 ; CHECK-NEXT: [[N_VEC11:%.*]] = and i64 [[SMAX9]], 9223372036854775800 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <8 x i32*> undef, i32* [[A]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT17:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT16]], <8 x i32*> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <8 x i32> undef, i32 [[NTRUNC]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT18]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT17:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT16]], <8 x i32*> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT18]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll index 0a1d40cc53eb..61d731b8a8bf 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -126,12 +126,12 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b, ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK5]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[K:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> undef, i32 [[NTRUNC]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT6]], <16 x i32> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <16 x i32*> undef, i32* [[A]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT8]], <16 x i32*> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[K:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <16 x i32*> poison, i32* [[A]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT8]], <16 x i32*> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -157,12 +157,12 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b, ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX10:%.*]] = select i1 [[TMP7]], i64 [[N]], i64 1 ; CHECK-NEXT: [[N_VEC12:%.*]] = and i64 [[SMAX10]], 9223372036854775800 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <8 x i32> undef, i32 [[K]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT17]], <8 x i32> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <8 x i32> undef, i32 [[NTRUNC]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT19]], <8 x i32> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <8 x i32*> undef, i32* [[A]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT21]], <8 x i32*> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT17]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT19]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT21]], <8 x i32*> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX13:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -260,12 +260,12 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK16]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[K:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <16 x i32> undef, i32 [[NTRUNC]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT17]], <16 x i32> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <16 x i32*> undef, i32* [[A]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT19]], <16 x i32*> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[K:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT17]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <16 x i32*> poison, i32* [[A]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT19]], <16 x i32*> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -294,12 +294,12 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* ; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX21:%.*]] = select i1 [[TMP9]], i64 [[N]], i64 1 ; CHECK-NEXT: [[N_VEC23:%.*]] = and i64 [[SMAX21]], 9223372036854775800 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <8 x i32> undef, i32 [[K]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT28]], <8 x i32> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <8 x i32> undef, i32 [[NTRUNC]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT30]], <8 x i32> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT33:%.*]] = insertelement <8 x i32*> undef, i32* [[A]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT34:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT33]], <8 x i32*> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT28]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT30]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT33:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT34:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT33]], <8 x i32*> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX24:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 167285707e02..0ed09dad3728 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -22,14 +22,14 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: call void @init(i32* [[BASE]]) ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[LEN:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> undef, i64 [[LEN]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i64> undef, i64 [[LEN]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT9]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x i64> undef, i64 [[LEN]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT11]], <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[LEN:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT9]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT11]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index 0858ce818b1b..7339a564568a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -1683,22 +1683,22 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s ; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -3 ; AVX2-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !41 -; AVX2-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -4 ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -3 ; AVX2-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>* ; AVX2-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4, !alias.scope !41 -; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD12]], <4 x i32> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD12]], <4 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8 ; AVX2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -3 ; AVX2-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>* ; AVX2-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4, !alias.scope !41 -; AVX2-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD14]], <4 x i32> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD14]], <4 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -12 ; AVX2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -3 ; AVX2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* ; AVX2-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4, !alias.scope !41 -; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD16]], <4 x i32> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD16]], <4 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer ; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt <4 x i32> [[REVERSE13]], zeroinitializer ; AVX2-NEXT: [[TMP22:%.*]] = icmp sgt <4 x i32> [[REVERSE15]], zeroinitializer @@ -1709,28 +1709,28 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s ; AVX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP3]] ; AVX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 0 ; AVX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -3 -; AVX2-NEXT: [[REVERSE18:%.*]] = shufflevector <4 x i1> [[TMP20]], <4 x i1> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE18:%.*]] = shufflevector <4 x i1> [[TMP20]], <4 x i1> poison, <4 x i32> ; AVX2-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>* ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP30]], i32 8, <4 x i1> [[REVERSE18]], <4 x double> undef), !alias.scope !44 -; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -4 ; AVX2-NEXT: [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -3 -; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP21]], <4 x i1> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP21]], <4 x i1> poison, <4 x i32> ; AVX2-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>* ; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP33]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> undef), !alias.scope !44 -; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8 ; AVX2-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -3 -; AVX2-NEXT: [[REVERSE23:%.*]] = shufflevector <4 x i1> [[TMP22]], <4 x i1> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE23:%.*]] = shufflevector <4 x i1> [[TMP22]], <4 x i1> poison, <4 x i32> ; AVX2-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <4 x double>* ; AVX2-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP36]], i32 8, <4 x i1> [[REVERSE23]], <4 x double> undef), !alias.scope !44 -; AVX2-NEXT: [[REVERSE25:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD24]], <4 x double> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE25:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD24]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -12 ; AVX2-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -3 -; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> poison, <4 x i32> ; AVX2-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <4 x double>* ; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP39]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !44 -; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD27]], <4 x double> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD27]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[TMP40:%.*]] = fadd <4 x double> [[REVERSE19]], ; AVX2-NEXT: [[TMP41:%.*]] = fadd <4 x double> [[REVERSE22]], ; AVX2-NEXT: [[TMP42:%.*]] = fadd <4 x double> [[REVERSE25]], @@ -1739,22 +1739,22 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s ; AVX2-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]] ; AVX2-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]] ; AVX2-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]] -; AVX2-NEXT: [[REVERSE29:%.*]] = shufflevector <4 x double> [[TMP40]], <4 x double> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE29:%.*]] = shufflevector <4 x double> [[TMP40]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0 ; AVX2-NEXT: [[TMP49:%.*]] = getelementptr inbounds double, double* [[TMP48]], i32 -3 ; AVX2-NEXT: [[TMP50:%.*]] = bitcast double* [[TMP49]] to <4 x double>* ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE29]], <4 x double>* [[TMP50]], i32 8, <4 x i1> [[REVERSE18]]), !alias.scope !46, !noalias !48 -; AVX2-NEXT: [[REVERSE31:%.*]] = shufflevector <4 x double> [[TMP41]], <4 x double> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE31:%.*]] = shufflevector <4 x double> [[TMP41]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[TMP51:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -4 ; AVX2-NEXT: [[TMP52:%.*]] = getelementptr inbounds double, double* [[TMP51]], i32 -3 ; AVX2-NEXT: [[TMP53:%.*]] = bitcast double* [[TMP52]] to <4 x double>* ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE31]], <4 x double>* [[TMP53]], i32 8, <4 x i1> [[REVERSE20]]), !alias.scope !46, !noalias !48 -; AVX2-NEXT: [[REVERSE33:%.*]] = shufflevector <4 x double> [[TMP42]], <4 x double> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE33:%.*]] = shufflevector <4 x double> [[TMP42]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[TMP54:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -8 ; AVX2-NEXT: [[TMP55:%.*]] = getelementptr inbounds double, double* [[TMP54]], i32 -3 ; AVX2-NEXT: [[TMP56:%.*]] = bitcast double* [[TMP55]] to <4 x double>* ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE33]], <4 x double>* [[TMP56]], i32 8, <4 x i1> [[REVERSE23]]), !alias.scope !46, !noalias !48 -; AVX2-NEXT: [[REVERSE35:%.*]] = shufflevector <4 x double> [[TMP43]], <4 x double> undef, <4 x i32> +; AVX2-NEXT: [[REVERSE35:%.*]] = shufflevector <4 x double> [[TMP43]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[TMP57:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -12 ; AVX2-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP57]], i32 -3 ; AVX2-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>* @@ -1827,22 +1827,22 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s ; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -7 ; AVX512-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>* ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4, !alias.scope !55 -; AVX512-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> poison, <8 x i32> ; AVX512-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8 ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -7 ; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>* ; AVX512-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !55 -; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD12]], <8 x i32> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD12]], <8 x i32> poison, <8 x i32> ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -16 ; AVX512-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -7 ; AVX512-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <8 x i32>* ; AVX512-NEXT: [[WIDE_LOAD14:%.*]] = load <8 x i32>, <8 x i32>* [[TMP16]], align 4, !alias.scope !55 -; AVX512-NEXT: [[REVERSE15:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD14]], <8 x i32> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE15:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD14]], <8 x i32> poison, <8 x i32> ; AVX512-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -24 ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -7 ; AVX512-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <8 x i32>* ; AVX512-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x i32>, <8 x i32>* [[TMP19]], align 4, !alias.scope !55 -; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD16]], <8 x i32> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD16]], <8 x i32> poison, <8 x i32> ; AVX512-NEXT: [[TMP20:%.*]] = icmp sgt <8 x i32> [[REVERSE]], zeroinitializer ; AVX512-NEXT: [[TMP21:%.*]] = icmp sgt <8 x i32> [[REVERSE13]], zeroinitializer ; AVX512-NEXT: [[TMP22:%.*]] = icmp sgt <8 x i32> [[REVERSE15]], zeroinitializer @@ -1853,28 +1853,28 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s ; AVX512-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP3]] ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 0 ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -7 -; AVX512-NEXT: [[REVERSE18:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE18:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> poison, <8 x i32> ; AVX512-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <8 x double>* ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP30]], i32 8, <8 x i1> [[REVERSE18]], <8 x double> undef), !alias.scope !58 -; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8 ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -7 -; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP21]], <8 x i1> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP21]], <8 x i1> poison, <8 x i32> ; AVX512-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <8 x double>* ; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP33]], i32 8, <8 x i1> [[REVERSE20]], <8 x double> undef), !alias.scope !58 -; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -16 ; AVX512-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -7 -; AVX512-NEXT: [[REVERSE23:%.*]] = shufflevector <8 x i1> [[TMP22]], <8 x i1> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE23:%.*]] = shufflevector <8 x i1> [[TMP22]], <8 x i1> poison, <8 x i32> ; AVX512-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <8 x double>* ; AVX512-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP36]], i32 8, <8 x i1> [[REVERSE23]], <8 x double> undef), !alias.scope !58 -; AVX512-NEXT: [[REVERSE25:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD24]], <8 x double> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE25:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD24]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -24 ; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -7 -; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x i1> [[TMP23]], <8 x i1> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x i1> [[TMP23]], <8 x i1> poison, <8 x i32> ; AVX512-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <8 x double>* ; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP39]], i32 8, <8 x i1> [[REVERSE26]], <8 x double> undef), !alias.scope !58 -; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD27]], <8 x double> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD27]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[TMP40:%.*]] = fadd <8 x double> [[REVERSE19]], ; AVX512-NEXT: [[TMP41:%.*]] = fadd <8 x double> [[REVERSE22]], ; AVX512-NEXT: [[TMP42:%.*]] = fadd <8 x double> [[REVERSE25]], @@ -1883,22 +1883,22 @@ define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %s ; AVX512-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]] ; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]] ; AVX512-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]] -; AVX512-NEXT: [[REVERSE29:%.*]] = shufflevector <8 x double> [[TMP40]], <8 x double> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE29:%.*]] = shufflevector <8 x double> [[TMP40]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0 ; AVX512-NEXT: [[TMP49:%.*]] = getelementptr inbounds double, double* [[TMP48]], i32 -7 ; AVX512-NEXT: [[TMP50:%.*]] = bitcast double* [[TMP49]] to <8 x double>* ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE29]], <8 x double>* [[TMP50]], i32 8, <8 x i1> [[REVERSE18]]), !alias.scope !60, !noalias !62 -; AVX512-NEXT: [[REVERSE31:%.*]] = shufflevector <8 x double> [[TMP41]], <8 x double> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE31:%.*]] = shufflevector <8 x double> [[TMP41]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[TMP51:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -8 ; AVX512-NEXT: [[TMP52:%.*]] = getelementptr inbounds double, double* [[TMP51]], i32 -7 ; AVX512-NEXT: [[TMP53:%.*]] = bitcast double* [[TMP52]] to <8 x double>* ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE31]], <8 x double>* [[TMP53]], i32 8, <8 x i1> [[REVERSE20]]), !alias.scope !60, !noalias !62 -; AVX512-NEXT: [[REVERSE33:%.*]] = shufflevector <8 x double> [[TMP42]], <8 x double> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE33:%.*]] = shufflevector <8 x double> [[TMP42]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[TMP54:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -16 ; AVX512-NEXT: [[TMP55:%.*]] = getelementptr inbounds double, double* [[TMP54]], i32 -7 ; AVX512-NEXT: [[TMP56:%.*]] = bitcast double* [[TMP55]] to <8 x double>* ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE33]], <8 x double>* [[TMP56]], i32 8, <8 x i1> [[REVERSE23]]), !alias.scope !60, !noalias !62 -; AVX512-NEXT: [[REVERSE35:%.*]] = shufflevector <8 x double> [[TMP43]], <8 x double> undef, <8 x i32> +; AVX512-NEXT: [[REVERSE35:%.*]] = shufflevector <8 x double> [[TMP43]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[TMP57:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -24 ; AVX512-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP57]], i32 -7 ; AVX512-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <8 x double>* diff --git a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll index e7a57fe18099..762950355594 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -20,8 +20,8 @@ target triple = "x86_64-unknown-linux-gnu" define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { ; O1-LABEL: @enabled( ; O1-NEXT: entry: -; O1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; O1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; O1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; O1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; O1-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; O1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; O1-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -137,8 +137,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly % ; ; O2-LABEL: @enabled( ; O2-NEXT: entry: -; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; O2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; O2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -254,8 +254,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly % ; ; O3-LABEL: @enabled( ; O3-NEXT: entry: -; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; O3-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; O3-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -371,8 +371,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly % ; ; O3DEFAULT-LABEL: @enabled( ; O3DEFAULT-NEXT: entry: -; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; O3DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; O3DEFAULT-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -488,8 +488,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly % ; ; Os-LABEL: @enabled( ; Os-NEXT: entry: -; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; Os-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; Os-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; Os-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -605,8 +605,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly % ; ; Oz-LABEL: @enabled( ; Oz-NEXT: entry: -; Oz-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; Oz-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; Oz-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; Oz-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; Oz-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; Oz-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; Oz-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -722,8 +722,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly % ; ; O1VEC2-LABEL: @enabled( ; O1VEC2-NEXT: entry: -; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; O1VEC2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; O1VEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; O1VEC2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -839,8 +839,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly % ; ; OzVEC2-LABEL: @enabled( ; OzVEC2-NEXT: entry: -; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; OzVEC2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; OzVEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; OzVEC2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -956,8 +956,8 @@ define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly % ; ; O3DIS-LABEL: @enabled( ; O3DIS-NEXT: entry: -; O3DIS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; O3DIS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; O3DIS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; O3DIS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; O3DIS-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; O3DIS-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; O3DIS-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -1110,8 +1110,8 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly ; ; O2-LABEL: @nopragma( ; O2-NEXT: entry: -; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; O2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; O2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -1227,8 +1227,8 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly ; ; O3-LABEL: @nopragma( ; O3-NEXT: entry: -; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; O3-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; O3-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -1344,8 +1344,8 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly ; ; O3DEFAULT-LABEL: @nopragma( ; O3DEFAULT-NEXT: entry: -; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; O3DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; O3DEFAULT-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -1461,8 +1461,8 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly ; ; Os-LABEL: @nopragma( ; Os-NEXT: entry: -; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; Os-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* ; Os-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; Os-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -1597,8 +1597,8 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly ; O1VEC2-NEXT: entry: ; O1VEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; O1VEC2: vector.ph: -; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; O1VEC2-NEXT: br label [[VECTOR_BODY:%.*]] ; O1VEC2: vector.body: ; O1VEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1614,7 +1614,7 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly ; O1VEC2-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP7]], align 4 ; O1VEC2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; O1VEC2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; O1VEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; O1VEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; O1VEC2: middle.block: ; O1VEC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64 ; O1VEC2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1630,7 +1630,7 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly ; O1VEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 ; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; O1VEC2: for.end: ; O1VEC2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A]], align 4 ; O1VEC2-NEXT: ret i32 [[TMP10]] @@ -1639,8 +1639,8 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly ; OzVEC2-NEXT: entry: ; OzVEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; OzVEC2: vector.ph: -; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 -; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; OzVEC2-NEXT: br label [[VECTOR_BODY:%.*]] ; OzVEC2: vector.body: ; OzVEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1656,7 +1656,7 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly ; OzVEC2-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP7]], align 4 ; OzVEC2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; OzVEC2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; OzVEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; OzVEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; OzVEC2: middle.block: ; OzVEC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64 ; OzVEC2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1672,7 +1672,7 @@ define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly ; OzVEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 ; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; OzVEC2: for.end: ; OzVEC2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A]], align 4 ; OzVEC2-NEXT: ret i32 [[TMP10]] @@ -1726,7 +1726,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly ; O1-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 ; O1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; O1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 -; O1-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0 +; O1-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; O1: for.end: ; O1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 ; O1-NEXT: ret i32 [[TMP1]] @@ -1743,7 +1743,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly ; O2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 ; O2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; O2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 -; O2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0 +; O2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; O2: for.end: ; O2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 ; O2-NEXT: ret i32 [[TMP1]] @@ -1760,7 +1760,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly ; O3-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 ; O3-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; O3-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 -; O3-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0 +; O3-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; O3: for.end: ; O3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 ; O3-NEXT: ret i32 [[TMP1]] @@ -1866,7 +1866,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly ; Os-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 ; Os-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; Os-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 -; Os-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0 +; Os-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; Os: for.end: ; Os-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 ; Os-NEXT: ret i32 [[TMP1]] @@ -1883,7 +1883,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly ; Oz-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 ; Oz-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; Oz-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 -; Oz-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0 +; Oz-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; Oz: for.end: ; Oz-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 ; Oz-NEXT: ret i32 [[TMP1]] @@ -1900,7 +1900,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly ; O1VEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 ; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 -; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !4 +; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; O1VEC2: for.end: ; O1VEC2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 ; O1VEC2-NEXT: ret i32 [[TMP1]] @@ -1917,7 +1917,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly ; OzVEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 ; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 -; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !4 +; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; OzVEC2: for.end: ; OzVEC2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 ; OzVEC2-NEXT: ret i32 [[TMP1]] @@ -1934,7 +1934,7 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly ; O3DIS-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 ; O3DIS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; O3DIS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 -; O3DIS-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0 +; O3DIS-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; O3DIS: for.end: ; O3DIS-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 ; O3DIS-NEXT: ret i32 [[TMP1]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll index e1528ae6f09f..a2a85bb0206f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -17,8 +17,8 @@ define i32 @foo_optsize() #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[INDUCTION]], @@ -32,7 +32,7 @@ define i32 @foo_optsize() #0 { ; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> [[TMP6]], <64 x i8>* [[TMP7]], i32 1, <64 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -47,7 +47,7 @@ define i32 @foo_optsize() #0 { ; CHECK-NEXT: store i8 [[DOT]], i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 0 ; @@ -58,8 +58,8 @@ define i32 @foo_optsize() #0 { ; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTOVF: vector.body: ; AUTOVF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> undef, i32 [[INDEX]], i32 0 -; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> undef, <32 x i32> zeroinitializer +; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i32 0 +; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer ; AUTOVF-NEXT: [[INDUCTION:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], ; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[INDUCTION]], @@ -73,7 +73,7 @@ define i32 @foo_optsize() #0 { ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> [[TMP6]], <32 x i8>* [[TMP7]], i32 1, <32 x i1> [[TMP1]]) ; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 -; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; AUTOVF: middle.block: ; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AUTOVF: scalar.ph: @@ -88,7 +88,7 @@ define i32 @foo_optsize() #0 { ; AUTOVF-NEXT: store i8 [[DOT]], i8* [[ARRAYIDX]], align 1 ; AUTOVF-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 ; AUTOVF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 -; AUTOVF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; AUTOVF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; AUTOVF: for.end: ; AUTOVF-NEXT: ret i32 0 ; @@ -121,8 +121,8 @@ define i32 @foo_minsize() #1 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[INDUCTION]], @@ -136,7 +136,7 @@ define i32 @foo_minsize() #1 { ; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> [[TMP6]], <64 x i8>* [[TMP7]], i32 1, <64 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -151,7 +151,7 @@ define i32 @foo_minsize() #1 { ; CHECK-NEXT: store i8 [[DOT]], i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !5 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 0 ; @@ -162,8 +162,8 @@ define i32 @foo_minsize() #1 { ; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTOVF: vector.body: ; AUTOVF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> undef, i32 [[INDEX]], i32 0 -; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> undef, <32 x i32> zeroinitializer +; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i32 0 +; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer ; AUTOVF-NEXT: [[INDUCTION:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], ; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[INDUCTION]], @@ -177,7 +177,7 @@ define i32 @foo_minsize() #1 { ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> [[TMP6]], <32 x i8>* [[TMP7]], i32 1, <32 x i1> [[TMP1]]) ; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 -; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; AUTOVF: middle.block: ; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AUTOVF: scalar.ph: @@ -192,7 +192,7 @@ define i32 @foo_minsize() #1 { ; AUTOVF-NEXT: store i8 [[DOT]], i8* [[ARRAYIDX]], align 1 ; AUTOVF-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 ; AUTOVF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 -; AUTOVF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !5 +; AUTOVF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; AUTOVF: for.end: ; AUTOVF-NEXT: ret i32 0 ; @@ -224,14 +224,103 @@ define void @scev4stride1(i32* noalias nocapture %a, i32* noalias nocapture read ; CHECK-NEXT: for.body.preheader: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> undef, i32 [[K:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[K:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <64 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 8 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 9 +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 10 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 11 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 12 +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 13 +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[INDEX]], 14 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[INDEX]], 15 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 16 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[INDEX]], 17 +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[INDEX]], 18 +; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[INDEX]], 19 +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[INDEX]], 20 +; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[INDEX]], 21 +; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[INDEX]], 22 +; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[INDEX]], 23 +; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 24 +; CHECK-NEXT: [[TMP25:%.*]] = add i32 [[INDEX]], 25 +; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 26 +; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[INDEX]], 27 +; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[INDEX]], 28 +; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[INDEX]], 29 +; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[INDEX]], 30 +; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[INDEX]], 31 +; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 32 +; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[INDEX]], 33 +; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[INDEX]], 34 +; CHECK-NEXT: [[TMP35:%.*]] = add i32 [[INDEX]], 35 +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[INDEX]], 36 +; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[INDEX]], 37 +; CHECK-NEXT: [[TMP38:%.*]] = add i32 [[INDEX]], 38 +; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[INDEX]], 39 +; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[INDEX]], 40 +; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[INDEX]], 41 +; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[INDEX]], 42 +; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[INDEX]], 43 +; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[INDEX]], 44 +; CHECK-NEXT: [[TMP45:%.*]] = add i32 [[INDEX]], 45 +; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[INDEX]], 46 +; CHECK-NEXT: [[TMP47:%.*]] = add i32 [[INDEX]], 47 +; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[INDEX]], 48 +; CHECK-NEXT: [[TMP49:%.*]] = add i32 [[INDEX]], 49 +; CHECK-NEXT: [[TMP50:%.*]] = add i32 [[INDEX]], 50 +; CHECK-NEXT: [[TMP51:%.*]] = add i32 [[INDEX]], 51 +; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 52 +; CHECK-NEXT: [[TMP53:%.*]] = add i32 [[INDEX]], 53 +; CHECK-NEXT: [[TMP54:%.*]] = add i32 [[INDEX]], 54 +; CHECK-NEXT: [[TMP55:%.*]] = add i32 [[INDEX]], 55 +; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[INDEX]], 56 +; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[INDEX]], 57 +; CHECK-NEXT: [[TMP58:%.*]] = add i32 [[INDEX]], 58 +; CHECK-NEXT: [[TMP59:%.*]] = add i32 [[INDEX]], 59 +; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[INDEX]], 60 +; CHECK-NEXT: [[TMP61:%.*]] = add i32 [[INDEX]], 61 +; CHECK-NEXT: [[TMP62:%.*]] = add i32 [[INDEX]], 62 +; CHECK-NEXT: [[TMP63:%.*]] = add i32 [[INDEX]], 63 +; CHECK-NEXT: [[TMP64:%.*]] = mul nsw <64 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], <64 x i32> [[TMP64]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <64 x i32> @llvm.masked.gather.v64i32.v64p0i32(<64 x i32*> [[TMP65]], i32 4, <64 x i1> , <64 x i32> undef) +; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, i32* [[TMP66]], i32 0 +; CHECK-NEXT: [[TMP68:%.*]] = bitcast i32* [[TMP67]] to <64 x i32>* +; CHECK-NEXT: store <64 x i32> [[WIDE_MASKED_GATHER]], <64 x i32>* [[TMP68]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <64 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP69:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP69]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 256, 256 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_07]], [[K]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[MUL]] +; CHECK-NEXT: [[TMP70:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_07]] +; CHECK-NEXT: store i32 [[TMP70]], i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 256 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: ret void ; @@ -239,14 +328,47 @@ define void @scev4stride1(i32* noalias nocapture %a, i32* noalias nocapture read ; AUTOVF-NEXT: for.body.preheader: ; AUTOVF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; AUTOVF: vector.ph: -; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[K:%.*]], i32 0 -; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer +; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[K:%.*]], i32 0 +; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTOVF: vector.body: +; AUTOVF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; AUTOVF-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; AUTOVF-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 +; AUTOVF-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 +; AUTOVF-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 +; AUTOVF-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 4 +; AUTOVF-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5 +; AUTOVF-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6 +; AUTOVF-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7 +; AUTOVF-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; AUTOVF-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], <8 x i32> [[TMP8]] +; AUTOVF-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP9]], i32 4, <8 x i1> , <8 x i32> undef) +; AUTOVF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] +; AUTOVF-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0 +; AUTOVF-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <8 x i32>* +; AUTOVF-NEXT: store <8 x i32> [[WIDE_MASKED_GATHER]], <8 x i32>* [[TMP12]], align 4 +; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; AUTOVF-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; AUTOVF-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; AUTOVF-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; AUTOVF: middle.block: ; AUTOVF-NEXT: [[CMP_N:%.*]] = icmp eq i32 256, 256 ; AUTOVF-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; AUTOVF: scalar.ph: +; AUTOVF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] +; AUTOVF-NEXT: br label [[FOR_BODY:%.*]] +; AUTOVF: for.body: +; AUTOVF-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; AUTOVF-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_07]], [[K]] +; AUTOVF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[MUL]] +; AUTOVF-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; AUTOVF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_07]] +; AUTOVF-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX1]], align 4 +; AUTOVF-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 +; AUTOVF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 256 +; AUTOVF-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]] ; AUTOVF: for.end.loopexit: ; AUTOVF-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll index 3f22b56290ed..ea33f766a4f0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll @@ -19,8 +19,8 @@ ; ; CHECK-LABEL: vector.ph: -; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> undef, i32 %n, i32 0 -; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i32 0 +; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-LABEL: vector.body: ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] @@ -48,8 +48,8 @@ ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body ; AVX-LABEL: vector.ph: -; AVX: %[[SplatVal:.*]] = insertelement <8 x i32> undef, i32 %n, i32 0 -; AVX: %[[Splat:.*]] = shufflevector <8 x i32> %[[SplatVal]], <8 x i32> undef, <8 x i32> zeroinitializer +; AVX: %[[SplatVal:.*]] = insertelement <8 x i32> poison, i32 %n, i32 0 +; AVX: %[[Splat:.*]] = shufflevector <8 x i32> %[[SplatVal]], <8 x i32> poison, <8 x i32> zeroinitializer ; AVX-LABEL: vector.body: ; AVX: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll index b30246c21950..eb858b457706 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll @@ -17,8 +17,8 @@ define void @small_tc(float* noalias nocapture %A, float* noalias nocapture read ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index 8818c39bac51..8f3a46d31beb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -82,8 +82,8 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TMP3]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] @@ -142,8 +142,8 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK: vector.ph10: ; CHECK-NEXT: [[N_RND_UP11:%.*]] = add nuw nsw i64 [[TMP19]], 4 ; CHECK-NEXT: [[N_VEC13:%.*]] = and i64 [[N_RND_UP11]], 8589934588 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT20:%.*]] = insertelement <4 x i64> undef, i64 [[TMP19]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT21:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT20]], <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP19]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT21:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT20]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY9:%.*]] ; CHECK: vector.body9: ; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT15:%.*]], [[PRED_STORE_CONTINUE51:%.*]] ] @@ -151,8 +151,8 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX14]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT28]], <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX14]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT28]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT29]], ; CHECK-NEXT: [[TMP23:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT21]] ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP23]], i32 0 @@ -329,13 +329,13 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture ; CHECK: vector.ph: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TMP3]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE27:%.*]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT14]], <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT14]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT15]], ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 @@ -534,8 +534,8 @@ define void @example23c(i16* noalias nocapture %src, i32* noalias nocapture %dst ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE22:%.*]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll index 03a6149ae166..a9ca94760a8a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -13,8 +13,8 @@ define dso_local void @tail_folding_enabled(i32* noalias nocapture %A, i32* noal ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], @@ -84,8 +84,8 @@ define dso_local void @tail_folding_disabled(i32* noalias nocapture %A, i32* noa ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], @@ -167,14 +167,14 @@ define i32 @reduction_i32(i32* nocapture readonly %A, i32* nocapture readonly %B ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP2]], 1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[TRIP_COUNT_MINUS_1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[INDUCTION]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll index 2c2818d186dc..cf484e713c6e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -74,17 +74,17 @@ define i32 @uniform_load2(i32* align(4) %addr) { ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ADDR:%.*]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ADDR]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP5]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ADDR]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ADDR]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[TMP7]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8]] = add <4 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9]] = add <4 x i32> [[VEC_PHI1]], [[BROADCAST_SPLAT5]] ; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[VEC_PHI2]], [[BROADCAST_SPLAT7]] @@ -614,17 +614,17 @@ define i32 @uniform_load_global() { ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* @GAddr, align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* @GAddr, align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP5]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* @GAddr, align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* @GAddr, align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[TMP7]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8]] = add <4 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9]] = add <4 x i32> [[VEC_PHI1]], [[BROADCAST_SPLAT5]] ; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[VEC_PHI2]], [[BROADCAST_SPLAT7]] @@ -689,17 +689,17 @@ define i32 @uniform_load_constexpr() { ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr (i32, i32* @GAddr, i64 5), align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr (i32, i32* @GAddr, i64 5), align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP5]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr (i32, i32* @GAddr, i64 5), align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr (i32, i32* @GAddr, i64 5), align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[TMP7]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8]] = add <4 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9]] = add <4 x i32> [[VEC_PHI1]], [[BROADCAST_SPLAT5]] ; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[VEC_PHI2]], [[BROADCAST_SPLAT7]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index aa25eaa5cdab..4976253dce4a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -38,7 +38,7 @@ define void @vectorized(float* noalias nocapture %A, float* noalias nocapture re ; CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[TMP8]], align 4, !llvm.access.group !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !1 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP1:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 20, 16 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -55,7 +55,7 @@ define void @vectorized(float* noalias nocapture %A, float* noalias nocapture re ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4, !llvm.access.group !0 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !4 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -94,8 +94,8 @@ define void @vectorized1(float* noalias nocapture %A, float* noalias nocapture r ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], @@ -112,7 +112,7 @@ define void @vectorized1(float* noalias nocapture %A, float* noalias nocapture r ; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP8]], <8 x float>* [[TMP9]], i32 4, <8 x i1> [[TMP1]]), !llvm.access.group !6 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !7 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -128,7 +128,7 @@ define void @vectorized1(float* noalias nocapture %A, float* noalias nocapture r ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !9 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP9:!llvm.loop !.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -166,8 +166,8 @@ define void @vectorized2(float* noalias nocapture %A, float* noalias nocapture r ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] @@ -183,7 +183,7 @@ define void @vectorized2(float* noalias nocapture %A, float* noalias nocapture r ; CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[TMP8]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 16, 16 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -200,7 +200,7 @@ define void @vectorized2(float* noalias nocapture %A, float* noalias nocapture r ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 16 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !11 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP11:!llvm.loop !.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll index 08dc1ba01ab8..5a0e8cf858cc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -33,8 +33,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1( ; DISABLED_MASKED_STRIDED-NEXT: entry: ; DISABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] @@ -127,15 +127,15 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; ; ENABLED_MASKED_STRIDED-LABEL: @masked_strided1( ; ENABLED_MASKED_STRIDED-NEXT: entry: ; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -144,16 +144,16 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <8 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP5]], i32 1, <8 x i1> [[TMP0]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1016 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP6]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP6]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.body: ; ENABLED_MASKED_STRIDED-NEXT: [[IX_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 1016, [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_09]], [[CONV]] @@ -168,7 +168,7 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no ; ENABLED_MASKED_STRIDED: for.inc: ; ENABLED_MASKED_STRIDED-NEXT: [[INC]] = add nuw nsw i32 [[IX_09]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -209,8 +209,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize( ; DISABLED_MASKED_STRIDED-NEXT: entry: ; DISABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] @@ -303,15 +303,15 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !2 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; ; ENABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize( ; ENABLED_MASKED_STRIDED-NEXT: entry: ; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -320,17 +320,17 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> undef) -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP6]], i32 1, <8 x i1> [[TMP0]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -397,10 +397,10 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 ; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE16:%.*]] ] @@ -495,7 +495,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !3 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -508,10 +508,10 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 ; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -522,17 +522,17 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> undef, <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[TMP6]], <16 x i8> undef) -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP8]], i32 1, <8 x i1> [[TMP4]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -604,10 +604,10 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 ; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE16:%.*]] ] @@ -702,7 +702,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !4 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -715,10 +715,10 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 ; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -729,17 +729,17 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <24 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> undef, <24 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <24 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <24 x i1> [[INTERLEAVED_MASK]], ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <24 x i8> @llvm.masked.load.v24i8.p0v24i8(<24 x i8>* [[TMP5]], i32 1, <24 x i1> [[TMP6]], <24 x i8> undef) -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i8> [[WIDE_MASKED_VEC]], <24 x i8> undef, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i8> [[WIDE_MASKED_VEC]], <24 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP8]], i32 1, <8 x i1> [[TMP4]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -843,7 +843,7 @@ define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture read ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP35]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP35]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -856,13 +856,13 @@ define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture read ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP0]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <16 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP2]], i32 1, <16 x i1> , <16 x i8> undef) -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <8 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: store <8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP4]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -914,8 +914,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc ; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 ; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] @@ -1008,7 +1008,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !6 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1020,28 +1020,28 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc ; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 ; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[INDEX]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[INDEX]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[INDUCTION]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> undef) -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP6]], i32 1, <8 x i1> [[TMP0]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !8 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1099,8 +1099,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2( ; DISABLED_MASKED_STRIDED-NEXT: entry: ; DISABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] @@ -1418,15 +1418,15 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !7 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; ; ENABLED_MASKED_STRIDED-LABEL: @masked_strided2( ; ENABLED_MASKED_STRIDED-NEXT: entry: ; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1435,10 +1435,10 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC1]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC1]], <8 x i8> [[STRIDED_VEC]] @@ -1451,7 +1451,7 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !9 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP9:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1524,10 +1524,10 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly ; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 ; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[GUARD:%.*]], i32 0 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[GUARD:%.*]], i32 0 +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE62:%.*]] ] @@ -1847,7 +1847,7 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP168:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP168]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !8 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP168]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1859,10 +1859,10 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly ; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 ; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[GUARD:%.*]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[GUARD:%.*]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1873,10 +1873,10 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> undef, <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]] @@ -1889,7 +1889,7 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP13]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !10 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP13]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1968,8 +1968,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca ; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 ; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] @@ -2287,7 +2287,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !9 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP9:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -2299,22 +2299,22 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca ; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 ; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 ; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[INDEX]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[INDEX]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[INDUCTION]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]] @@ -2326,7 +2326,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !11 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP11:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 7a57af342a8b..d8229dc42634 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -460,8 +460,8 @@ for.end: ; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* %A, i64 [[INDEX]] ; INTER-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <32 x i8>* ; INTER-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, <32 x i8>* [[TMP5]], align 1 -; INTER-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> undef, <4 x i32> -; INTER-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> undef, <4 x i32> +; INTER-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> +; INTER-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> ; INTER-NEXT: [[TMP6:%.*]] = xor <4 x i8> [[STRIDED_VEC5]], [[STRIDED_VEC]] ; INTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* %B, i64 [[INDEX]] ; INTER-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <4 x i8>* diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll index 0dba201e73fb..a9ddf653b77a 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll @@ -14,8 +14,8 @@ define dso_local void @constTC(i32* noalias nocapture %A) optsize { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[INDUCTION1:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[INDUCTION2:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll index 82819a5db077..c40742fda0d8 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll @@ -18,8 +18,8 @@ define dso_local void @alignTC(i32* noalias nocapture %A, i32 %n) optsize { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index 1407abce979e..57e403d276c1 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -8,8 +8,8 @@ define void @can_sink_after_store(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr #0 { ; CHECK-LABEL: vector.ph: -; CHECK: %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %x, i32 0 -; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 +; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3 ; CHECK-NEXT: br label %vector.body @@ -62,8 +62,8 @@ exit: ; and not introduce traps on additional paths. define void @sink_sdiv(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr #0 { ; CHECK-LABEL: vector.ph: -; CHECK: %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %x, i32 0 -; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 +; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3 ; CHECK-NEXT: br label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index 30c0387f8192..c5fa1835aada 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -15,15 +15,15 @@ ; VEC4_INTERL1-LABEL: @fp_iv_loop1( ; VEC4_INTERL1: vector.ph: -; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0 -; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> undef, float %fpinc, i32 0 -; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> undef, <4 x i32> zeroinitializer +; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float %init, i32 0 +; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float %fpinc, i32 0 +; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], ; VEC4_INTERL1-NEXT: [[INDUCTION4:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP5]] ; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul fast float %fpinc, 4.000000e+00 -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> undef, float [[TMP6]], i32 0 -; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> undef, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0 +; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL1-NEXT: br label %vector.body ; VEC4_INTERL1: vector.body: ; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] @@ -37,15 +37,15 @@ ; VEC4_INTERL2-LABEL: @fp_iv_loop1( ; VEC4_INTERL2: vector.ph: -; VEC4_INTERL2: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0 -; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer -; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x float> undef, float %fpinc, i32 0 -; VEC4_INTERL2-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT3]], <4 x float> undef, <4 x i32> zeroinitializer +; VEC4_INTERL2: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float %init, i32 0 +; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x float> poison, float %fpinc, i32 0 +; VEC4_INTERL2-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT3]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[DOTSPLAT4]], ; VEC4_INTERL2-NEXT: [[INDUCTION5:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP5]] ; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast float %fpinc, 4.000000e+00 -; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> undef, float [[TMP6]], i32 0 -; VEC4_INTERL2-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> undef, <4 x i32> zeroinitializer +; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0 +; VEC4_INTERL2-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL2-NEXT: br label %vector.body ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] @@ -115,8 +115,8 @@ for.end: ; preds = %for.end.loopexit, % ; VEC4_INTERL1-LABEL: @fp_iv_loop2( ; VEC4_INTERL1: vector.ph: -; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0 -; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float %init, i32 0 +; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL1-NEXT: [[INDUCTION2:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], ; VEC4_INTERL1-NEXT: br label %vector.body ; VEC4_INTERL1: vector.body: @@ -172,17 +172,17 @@ for.end: ; preds = %for.end.loopexit, % ; VEC4_INTERL1: for.body.lr.ph: ; VEC4_INTERL1: [[TMP0:%.*]] = load float, float* @fp_inc, align 4 ; VEC4_INTERL1: vector.ph: -; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0 -; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 -; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> undef, <4 x i32> zeroinitializer +; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float %init, i32 0 +; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 +; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[DOTSPLAT6]], ; VEC4_INTERL1-NEXT: [[INDUCTION7:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP7]] ; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP0]], 4.000000e+00 -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x float> undef, float [[TMP8]], i32 0 -; VEC4_INTERL1-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT8]], <4 x float> undef, <4 x i32> zeroinitializer -; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 -; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT12]], <4 x float> undef, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x float> poison, float [[TMP8]], i32 0 +; VEC4_INTERL1-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT8]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT12]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL1: vector.body: ; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] diff --git a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll index 142e4afbf8e3..4f3648404c18 100644 --- a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll +++ b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll @@ -50,8 +50,8 @@ define float @minloopattr(float* nocapture readonly %arg) #0 { ; CHECK-NEXT: [[T:%.*]] = load float, float* [[ARG:%.*]], align 4 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[T]], i32 0 -; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[T]], i32 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -66,7 +66,7 @@ define float @minloopattr(float* nocapture readonly %arg) #0 { ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x float> [[TMP5]], [[RDX_SHUF]] @@ -90,7 +90,7 @@ define float @minloopattr(float* nocapture readonly %arg) #0 { ; CHECK-NEXT: [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]] ; CHECK-NEXT: [[T7]] = add i64 [[T1]], 1 ; CHECK-NEXT: [[T8:%.*]] = icmp eq i64 [[T7]], 65537 -; CHECK-NEXT: br i1 [[T8]], label [[OUT]], label [[LOOP]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[T8]], label [[OUT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]] ; CHECK: out: ; CHECK-NEXT: [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[T6_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index 5ad5457dd802..795df0caedd6 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -451,8 +451,8 @@ define void @minimal_bit_widths(i1 %c) { ; ; VEC-LABEL: @minimal_bit_widths( ; VEC-NEXT: entry: -; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> undef, i1 [[C:%.*]], i32 0 -; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> undef, <2 x i32> zeroinitializer +; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0 +; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll index 669746aa1961..5899019e0adb 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll @@ -15,15 +15,15 @@ ; CHECK: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @int_inc, align 4 ; CHECK: vector.ph: -; CHECK: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %init, i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> undef, i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 %init, i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = mul <8 x i32> , [[DOTSPLAT3]] ; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP0]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP7]], i32 0 -; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP7]], i32 0 +; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %vector.body ; CHECK: vector.body: ; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] @@ -86,15 +86,15 @@ for.end: ; preds = %for.end.loopexit, % ; CHECK-LABEL: @induction_with_loop_inv( ; CHECK: vector.ph: -; CHECK: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %x.011, i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> undef, i32 %j.012, i32 0 -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 %x.011, i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 %j.012, i32 0 +; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> , [[DOTSPLAT3]] ; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 %j.012, 8 -; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP5]], i32 0 -; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i32 0 +; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %vector.body ; CHECK: vector.body: ; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] @@ -161,13 +161,13 @@ for.end6: ; preds = %for.end6.loopexit, ; CHECK-LABEL: @non_primary_iv_loop_inv_trunc( ; CHECK: vector.ph: ; CHECK: [[TMP3:%.*]] = trunc i64 %step to i32 -; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0 -; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> , [[DOTSPLAT6]] ; CHECK-NEXT: [[INDUCTION7:%.*]] = add <8 x i32> zeroinitializer, [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP3]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <8 x i32> undef, i32 [[TMP5]], i32 0 -; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT8]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i32 0 +; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %vector.body ; CHECK: vector.body: ; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index db1234d594ff..e4b14b520860 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -702,8 +702,8 @@ exit: ; CHECK-LABEL: @nonprimary( ; CHECK: vector.ph: -; CHECK: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0 -; CHECK: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK: %[[INSERT:.*]] = insertelement <2 x i32> poison, i32 %i, i32 0 +; CHECK: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK: %[[START:.*]] = add <2 x i32> %[[SPLAT]], ; CHECK: vector.body: ; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] @@ -721,8 +721,8 @@ exit: ; ; IND-LABEL: @nonprimary( ; IND: vector.ph: -; IND: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0 -; IND: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer +; IND: %[[INSERT:.*]] = insertelement <2 x i32> poison, i32 %i, i32 0 +; IND: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; IND: %[[START:.*]] = add <2 x i32> %[[SPLAT]], ; IND: vector.body: ; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] @@ -739,8 +739,8 @@ exit: ; ; UNROLL-LABEL: @nonprimary( ; UNROLL: vector.ph: -; UNROLL: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0 -; UNROLL: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer +; UNROLL: %[[INSERT:.*]] = insertelement <2 x i32> poison, i32 %i, i32 0 +; UNROLL: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL: %[[START:.*]] = add <2 x i32> %[[SPLAT]], ; UNROLL: vector.body: ; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-1.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-1.ll index 1d487bfb89c1..3910973ada6f 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-1.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-1.ll @@ -37,8 +37,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: vector.body: ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4 -; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> -; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> +; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> +; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> %class.Complex = type { float, float } diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll index c647f586b18e..803ee72dea97 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll @@ -18,7 +18,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; ; CHECK: vector.body: ; CHECK: %wide.vec = load <4 x i64>, <4 x i64>* %{{.*}} -; CHECK: %strided.vec = shufflevector <4 x i64> %wide.vec, <4 x i64> undef, <2 x i32> +; CHECK: %strided.vec = shufflevector <4 x i64> %wide.vec, <4 x i64> poison, <2 x i32> ; ; CHECK: pred.store.if ; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0 @@ -67,7 +67,7 @@ for.end: ; ; CHECK: vector.body: ; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}} -; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32> +; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> poison, <2 x i32> ; ; CHECK: pred.store.if ; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0 @@ -126,7 +126,7 @@ for.end: ; ; CHECK: vector.body: ; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}} -; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32> +; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> poison, <2 x i32> ; CHECK: store i64 %x, {{.*}} ; CHECK: store i64 %x, {{.*}} ; diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index f7a02d613af1..3e0308bb00f4 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -18,8 +18,8 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; CHECK-LABEL: @test_array_load2_store2( ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 -; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> -; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> +; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> +; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> ; CHECK: add nsw <4 x i32> ; CHECK: mul nsw <4 x i32> ; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> @@ -69,9 +69,9 @@ for.end: ; preds = %for.body ; CHECK-LABEL: @test_struct_array_load3_store3( ; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4 -; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> -; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> -; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> +; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> poison, <4 x i32> +; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> poison, <4 x i32> +; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> poison, <4 x i32> ; CHECK: add nsw <4 x i32> {{.*}}, ; CHECK: add nsw <4 x i32> {{.*}}, ; CHECK: add nsw <4 x i32> {{.*}}, @@ -147,10 +147,10 @@ define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], %struct.ST4* [[S:%.*]], i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> undef, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[STRIDED_VEC2]] ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC3]] @@ -274,9 +274,9 @@ for.body: ; preds = %for.body, %entry ; CHECK: %[[G1:.+]] = getelementptr inbounds i32, i32* %[[G0]], i64 -6 ; CHECK: %[[B0:.+]] = bitcast i32* %[[G1]] to <8 x i32>* ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %[[B0]], align 4 -; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> +; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> -; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> +; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> ; CHECK: add nsw <4 x i32> ; CHECK: sub nsw <4 x i32> @@ -328,7 +328,7 @@ for.body: ; preds = %for.body, %entry ; CHECK-LABEL: @even_load_static_tc( ; CHECK: vector.body: ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 -; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> +; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> ; CHECK: icmp eq i64 %index.next, 508 ; CHECK: middle.block: ; CHECK: br i1 false, label %for.cond.cleanup, label %scalar.ph @@ -371,7 +371,7 @@ for.body: ; preds = %for.body, %entry ; CHECK: %n.vec = sub i64 %[[N]], %[[R]] ; CHECK: vector.body: ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 -; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> +; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> ; CHECK: icmp eq i64 %index.next, %n.vec ; CHECK: middle.block: ; CHECK: br i1 false, label %for.cond.cleanup, label %scalar.ph @@ -416,7 +416,7 @@ for.body: ; preds = %for.body, %entry ; CHECK-LABEL: @load_gap_reverse( ; CHECK-NOT: %wide.vec = load <8 x i64>, <8 x i64>* %{{.*}}, align 8 -; CHECK-NOT: %strided.vec = shufflevector <8 x i64> %wide.vec, <8 x i64> undef, <4 x i32> +; CHECK-NOT: %strided.vec = shufflevector <8 x i64> %wide.vec, <8 x i64> poison, <4 x i32> %pair = type { i64, i64 } define void @load_gap_reverse(%pair* noalias nocapture readonly %P1, %pair* noalias nocapture readonly %P2, i64 %X) { @@ -451,8 +451,8 @@ for.exit: ; CHECK-LABEL: @mixed_load2_store2( ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4 -; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> -; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> +; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> +; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> ; CHECK: %interleaved.vec = shufflevector <4 x i32> %{{.*}}, <8 x i32> ; CHECK: store <8 x i32> %interleaved.vec @@ -495,9 +495,9 @@ for.body: ; preds = %for.body, %entry ; CHECK-LABEL: @mixed_load3_store3( ; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4 -; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> -; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> -; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> +; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> poison, <4 x i32> +; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> poison, <4 x i32> +; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> poison, <4 x i32> ; CHECK: %interleaved.vec = shufflevector <8 x i32> %{{.*}}, <12 x i32> ; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* %{{.*}}, align 4 @@ -552,8 +552,8 @@ for.body: ; preds = %for.body, %entry ; CHECK-LABEL: @int_float_struct( ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 -; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> -; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> +; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> +; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> ; CHECK: bitcast <4 x i32> %[[V1]] to <4 x float> ; CHECK: add <4 x i32> ; CHECK: fadd fast <4 x float> @@ -670,7 +670,7 @@ for.end: ; CHECK: %[[X4:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 6 ; CHECK: store i32 %[[X4:.+]], {{.*}} ; CHECK: %[[L2:.+]] = load <8 x i32>, <8 x i32>* {{.*}} -; CHECK: %[[S1:.+]] = shufflevector <8 x i32> %[[L2]], <8 x i32> undef, <4 x i32> +; CHECK: %[[S1:.+]] = shufflevector <8 x i32> %[[L2]], <8 x i32> poison, <4 x i32> ; CHECK: add <4 x i32> %[[S1]], %[[Phi]] define i32 @PR27626_1(%pair.i32 *%p, i64 %n) { @@ -771,7 +771,7 @@ for.end: ; CHECK: %[[X4:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 6 ; CHECK: store i32 %[[X4:.+]], {{.*}} ; CHECK: %[[L2:.+]] = load <8 x i32>, <8 x i32>* {{.*}} -; CHECK: %[[S1:.+]] = shufflevector <8 x i32> %[[L2]], <8 x i32> undef, <4 x i32> +; CHECK: %[[S1:.+]] = shufflevector <8 x i32> %[[L2]], <8 x i32> poison, <4 x i32> ; CHECK: add <4 x i32> %[[S1]], %[[Phi]] define i32 @PR27626_3(%pair.i32 *%p, i64 %n, i32 %z) { @@ -812,10 +812,10 @@ for.end: ; CHECK-LABEL: @PR27626_4( ; CHECK: vector.ph: -; CHECK: %[[INS_Y:.+]] = insertelement <4 x i32> undef, i32 %y, i32 0 -; CHECK: %[[SPLAT_Y:.+]] = shufflevector <4 x i32> %[[INS_Y]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK: %[[INS_Z:.+]] = insertelement <4 x i32> undef, i32 %z, i32 0 -; CHECK: %[[SPLAT_Z:.+]] = shufflevector <4 x i32> %[[INS_Z]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: %[[INS_Y:.+]] = insertelement <4 x i32> poison, i32 %y, i32 0 +; CHECK: %[[SPLAT_Y:.+]] = shufflevector <4 x i32> %[[INS_Y]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK: %[[INS_Z:.+]] = insertelement <4 x i32> poison, i32 %z, i32 0 +; CHECK: %[[SPLAT_Z:.+]] = shufflevector <4 x i32> %[[INS_Z]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK: vector.body: ; CHECK: store i32 %x, {{.*}} ; CHECK: store i32 %x, {{.*}} @@ -905,8 +905,8 @@ for.end: ; CHECK: vector.body: ; CHECK: %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[VSHUF1:.+]], %vector.body ] ; CHECK: %wide.vec = load <8 x i16> -; CHECK: %[[VSHUF0:.+]] = shufflevector <8 x i16> %wide.vec, <8 x i16> undef, <4 x i32> -; CHECK: %[[VSHUF1:.+]] = shufflevector <8 x i16> %wide.vec, <8 x i16> undef, <4 x i32> +; CHECK: %[[VSHUF0:.+]] = shufflevector <8 x i16> %wide.vec, <8 x i16> poison, <4 x i32> +; CHECK: %[[VSHUF1:.+]] = shufflevector <8 x i16> %wide.vec, <8 x i16> poison, <4 x i32> ; CHECK: %[[VSHUF:.+]] = shufflevector <4 x i16> %vector.recur, <4 x i16> %[[VSHUF1]], <4 x i32> ; CHECK: sext <4 x i16> %[[VSHUF0]] to <4 x i32> ; CHECK: sext <4 x i16> %[[VSHUF]] to <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll index cc4c55c3f444..1ac402000f2d 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -183,10 +183,10 @@ for.end: ; preds = %for.body ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> undef, i32 [[K:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> undef, i32 [[NTRUNC]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT7]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[K:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT7]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -289,10 +289,10 @@ for.end: ; preds = %for.body ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> undef, i32 [[NTRUNC]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> undef, i1 [[CMP]], i32 3 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[K]], i32 3 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[K]], i32 3 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[BROADCAST_SPLAT6]], <4 x i32> [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index 6f4af4650f80..f93c038de6bb 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -55,8 +55,8 @@ define void @bottom_tested(i16* %p, i32 %n) { ; TAILFOLD-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 ; TAILFOLD-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; TAILFOLD-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 -; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> undef, <2 x i32> zeroinitializer +; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] ; TAILFOLD: vector.body: ; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll index f5d7f5ba2474..32e85724b4b2 100644 --- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll @@ -11,8 +11,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; Turn this into a max reduction. Make sure we use a splat to initialize the ; vector for the reduction. ; CHECK-LABEL: @max_red( -; CHECK: %[[VAR:.*]] = insertelement <2 x i32> undef, i32 %max, i32 0 -; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK: %[[VAR:.*]] = insertelement <2 x i32> poison, i32 %max, i32 0 +; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK: icmp sgt <2 x i32> ; CHECK: select <2 x i1> ; CHECK: middle.block diff --git a/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll b/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll index aa1c429185ce..74fc8128def0 100644 --- a/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll @@ -68,8 +68,8 @@ define void @Test(%struct.s* nocapture %obj, i64 %z) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4, !alias.scope !0 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4, !alias.scope !3 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[BROADCAST_SPLAT]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 2, i64 [[I]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 8f5ffb112836..67a2cfd00b24 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -122,6 +122,30 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; CHECK: for.end: ; CHECK-NEXT: ret void ; +; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f1( +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: entry: +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK-PROFITABLE-BY-DEFAULT: for.body.preheader: +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-PROFITABLE-BY-DEFAULT: for.body: +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[BB:%.*]], i64 [[INDVARS_IV]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[CC:%.*]], i64 [[INDVARS_IV]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ADD:%.*]] = fadd fast float [[TMP0]], [[TMP1]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[AA:%.*]], i64 [[INDVARS_IV]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK-PROFITABLE-BY-DEFAULT: for.end.loopexit: +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_END]] +; CHECK-PROFITABLE-BY-DEFAULT: for.end: +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: ret void +; entry: %cmp1 = icmp sgt i32 %N, 0 br i1 %cmp1, label %for.body.preheader, label %for.end @@ -213,7 +237,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 -3 ; CHECK-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP23]] to <4 x float>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP24]], align 4, !alias.scope !13 -; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = fadd fast <4 x float> [[REVERSE]], ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, float* [[TMP26]], i32 0 @@ -249,7 +273,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, float* [[TMP36]], i32 -3 ; CHECK-NEXT: [[TMP38:%.*]] = bitcast float* [[TMP37]] to <4 x float>* ; CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP38]], align 4 -; CHECK-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x float> [[WIDE_LOAD16]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x float> [[WIDE_LOAD16]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP39:%.*]] = fadd fast <4 x float> [[REVERSE17]], ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP30]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 0 @@ -287,6 +311,34 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; CHECK: for.end: ; CHECK-NEXT: ret i32 0 ; +; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f2( +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: entry: +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 1 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK-PROFITABLE-BY-DEFAULT: for.body.preheader: +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-PROFITABLE-BY-DEFAULT: for.body: +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[I_014:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP1:%.*]] = xor i32 [[I_014]], -1 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[SUB2:%.*]] = add i32 [[TMP1]], [[N]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB2]] to i64 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[IDXPROM]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP2]], 1.000000e+00 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDVARS_IV]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store float [[CONV3]], float* [[ARRAYIDX5]], align 4 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK-PROFITABLE-BY-DEFAULT: for.end.loopexit: +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_END]] +; CHECK-PROFITABLE-BY-DEFAULT: for.end: +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: ret i32 0 +; entry: %cmp1 = icmp sgt i32 %n, 1 br i1 %cmp1, label %for.body.preheader, label %for.end @@ -320,6 +372,69 @@ for.end: ; preds = %for.end.loopexit, % } define void @f3(i8* noalias %A, i64 %n) { +; CHECK-LABEL: @f3( +; CHECK-NEXT: iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>* +; CHECK-NEXT: store <4 x i8> , <4 x i8>* [[TMP3]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP21:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <4 x i8>* +; CHECK-NEXT: store <4 x i8> , <4 x i8>* [[TMP8]], align 1 +; CHECK-NEXT: [[INDEX_NEXT5]] = add i64 [[INDEX4]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP22:!llvm.loop !.*]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[IV]] +; CHECK-NEXT: store i8 1, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP23:!llvm.loop !.*]] +; CHECK: for.end.loopexit.loopexit: +; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; ; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f3( ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: iter.check: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index ddc4747141fd..67b8b84e0305 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -229,8 +229,8 @@ define void @stride1(i16* noalias %B, i32 %BStride) optsize { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> undef, i32 [[BSTRIDE:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll index 386c9ec40d02..5d8418ba9f75 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll @@ -15,8 +15,8 @@ ; ; RUN: opt -S -loop-vectorize -enable-vplan-native-path < %s | FileCheck %s ; CHECK-LABEL: vector.ph: -; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> undef, i32 %n, i32 0 -; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i32 0 +; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-LABEL: vector.body: ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll index 18eca4c95894..5a11cc531c2c 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll @@ -16,10 +16,10 @@ ; RUN: opt -S -loop-vectorize -enable-vplan-native-path < %s | FileCheck %s ; CHECK: %[[ZeroTripChk:.*]] = icmp sgt i32 %jCount, 0 ; CHECK-LABEL: vector.ph: -; CHECK: %[[CVal0:.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 -; CHECK-NEXT: %[[CSplat:.*]] = shufflevector <4 x i32> %[[CVal0]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK: %[[ZVal0:.*]] = insertelement <4 x i1> undef, i1 %[[ZeroTripChk]], i32 0 -; CHECK-NEXT: %[[ZSplat:.*]] = shufflevector <4 x i1> %[[ZVal0]], <4 x i1> undef, <4 x i32> zeroinitializer +; CHECK: %[[CVal0:.*]] = insertelement <4 x i32> poison, i32 %c, i32 0 +; CHECK-NEXT: %[[CSplat:.*]] = shufflevector <4 x i32> %[[CVal0]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK: %[[ZVal0:.*]] = insertelement <4 x i1> poison, i1 %[[ZeroTripChk]], i32 0 +; CHECK-NEXT: %[[ZSplat:.*]] = shufflevector <4 x i1> %[[ZVal0]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-LABEL: vector.body: ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index c09f35256967..bed3ba555045 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -6,15 +6,23 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 ; Function Attrs: nofree norecurse nounwind define void @a(i8* readnone %b) { ; CHECK-LABEL: @a( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B1:%.*]] = ptrtoint i8* [[B:%.*]] to i64 +; CHECK-NEXT: [[CMP_NOT4:%.*]] = icmp eq i8* [[B]], null +; CHECK-NEXT: br i1 [[CMP_NOT4]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[B1]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0:%.*]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[N_VEC]], -1 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ null, %vector.ph ], [ [[PTR_IND:%.*]], %pred.store.continue7 ] -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %pred.store.continue7 ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP2]], i64 -1 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i8*> [[TMP3]], i32 0 @@ -22,14 +30,64 @@ define void @a(i8* readnone %b) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP5]], i32 -3 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <4 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP7]], align 1 -; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> undef, <4 x i32> +; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i8*> [[TMP3]], i32 0 +; CHECK-NEXT: store i8 95, i8* [[TMP11]], align 1 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP9]], i32 1 +; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] +; CHECK: pred.store.if2: +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i8*> [[TMP3]], i32 1 +; CHECK-NEXT: store i8 95, i8* [[TMP13]], align 1 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]] +; CHECK: pred.store.continue3: +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP9]], i32 2 +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; CHECK: pred.store.if4: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i8*> [[TMP3]], i32 2 +; CHECK-NEXT: store i8 95, i8* [[TMP15]], align 1 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]] +; CHECK: pred.store.continue5: +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP9]], i32 3 +; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] +; CHECK: pred.store.if6: +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i8*> [[TMP3]], i32 3 +; CHECK-NEXT: store i8 95, i8* [[TMP17]], align 1 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]] ; CHECK: pred.store.continue7: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 -4 +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[C_05:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[C_05]], i64 -1 +; CHECK-NEXT: [[TMP19:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP19]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i8 95, i8* [[INCDEC_PTR]], align 1 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8* [[INCDEC_PTR]], [[B]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] +; entry: %cmp.not4 = icmp eq i8* %b, null diff --git a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll index d07e35687932..6d710d38fad0 100644 --- a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll +++ b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll @@ -34,8 +34,8 @@ define void @scev4stride1(i32* noalias nocapture %a, i32* noalias nocapture read ; CHECK-NEXT: for.body.preheader: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[K:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[K:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll index b191d0133e3a..125fb0a4f605 100644 --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -18,13 +18,13 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 99, [[TMP0]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* @v_38, align 1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i16> undef, i16 [[TMP2]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT1]], <2 x i16> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT1]], <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i16> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i16> [[BROADCAST_SPLAT2]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll index 0d61a8773613..c01a9f9779f0 100644 --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -25,22 +25,22 @@ define void @test(i16 %x, i64 %y, i32* %ptr) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[INC]] ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP2]], 1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[TRIP_COUNT_MINUS_1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[INC]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> undef, i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[INC]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INC]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i64> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 0, [[INC]] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], [[TMP4]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT4]], ; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 @@ -58,10 +58,10 @@ define void @test(i16 %x, i64 %y, i32* %ptr) { ; CHECK-NEXT: [[OFFSET_IDX7:%.*]] = mul i64 [[INDEX]], [[INC]] ; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[OFFSET_IDX7]] to i8 ; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[INC]] to i8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <2 x i8> undef, i8 [[TMP9]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT8]], <2 x i8> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> undef, i8 [[TMP10]], i32 0 -; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT10]], <2 x i8> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT8]], <2 x i8> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> poison, i8 [[TMP10]], i32 0 +; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT10]], <2 x i8> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = mul <2 x i8> , [[DOTSPLAT11]] ; CHECK-NEXT: [[INDUCTION12:%.*]] = add <2 x i8> [[BROADCAST_SPLAT9]], [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = mul i8 0, [[TMP10]] @@ -70,6 +70,34 @@ define void @test(i16 %x, i64 %y, i32* %ptr) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4 +; CHECK-NEXT: [[V2:%.*]] = trunc i64 [[IV]] to i8 +; CHECK-NEXT: [[V3:%.*]] = add i8 [[V2]], 1 +; CHECK-NEXT: [[CMP15:%.*]] = icmp slt i8 [[V3]], 5 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[INC]] +; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], [[LOOP2:!llvm.loop !.*]] +; CHECK: loop.exit: +; CHECK-NEXT: [[DIV_1:%.*]] = udiv i64 [[Y]], [[ADD]] +; CHECK-NEXT: [[V1:%.*]] = add i64 [[DIV_1]], 1 +; CHECK-NEXT: br label [[LOOP_2:%.*]] +; CHECK: loop.2: +; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[IV_NEXT_1:%.*]], [[LOOP_2]] ], [ 0, [[LOOP_EXIT]] ] +; CHECK-NEXT: [[IV_NEXT_1]] = add i64 [[IV_1]], [[V1]] +; CHECK-NEXT: call void @use(i64 [[IV_NEXT_1]]) +; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV_NEXT_1]], 200 +; CHECK-NEXT: br i1 [[EC]], label [[LOOP_2]], label [[LOOP_2_EXIT:%.*]] +; CHECK: loop.2.exit: +; CHECK-NEXT: [[C:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C]], label [[LOOP_PREHEADER]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: %conv19 = sext i16 %x to i64 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll index 692b6e5c4ce1..aeda1800e21f 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll @@ -18,7 +18,7 @@ ; CHECKUF1: %[[IDXB:.*]] = getelementptr inbounds double, double* %b, i64 %index ; CHECKUF1: %[[IDXB_CAST:.*]] = bitcast double* %[[IDXB]] to * ; CHECKUF1: %wide.load = load , * %[[IDXB_CAST]], align 8, !alias.scope !0 -; CHECKUF1: %[[FADD:.*]] = fadd %wide.load, shufflevector ( insertelement ( undef, double 1.000000e+00, i32 0), undef, zeroinitializer) +; CHECKUF1: %[[FADD:.*]] = fadd %wide.load, shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer) ; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, double* %a, i64 %index ; CHECKUF1: %[[IDXA_CAST:.*]] = bitcast double* %[[IDXA]] to * ; CHECKUF1: store %[[FADD]], * %[[IDXA_CAST]], align 8, !alias.scope !3, !noalias !0 @@ -55,8 +55,8 @@ ; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds double, double* %[[IDXB]], i64 %[[VSCALE2_EXT]] ; CHECKUF2: %[[IDXB_NEXT_CAST:.*]] = bitcast double* %[[IDXB_NEXT]] to * ; CHECKUF2: %wide.load{{[0-9]+}} = load , * %[[IDXB_NEXT_CAST]], align 8, !alias.scope !0 -; CHECKUF2: %[[FADD:.*]] = fadd %wide.load, shufflevector ( insertelement ( undef, double 1.000000e+00, i32 0), undef, zeroinitializer) -; CHECKUF2: %[[FADD_NEXT:.*]] = fadd %wide.load{{[0-9]+}}, shufflevector ( insertelement ( undef, double 1.000000e+00, i32 0), undef, zeroinitializer) +; CHECKUF2: %[[FADD:.*]] = fadd %wide.load, shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer) +; CHECKUF2: %[[FADD_NEXT:.*]] = fadd %wide.load{{[0-9]+}}, shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer) ; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, double* %a, i64 %index ; CHECKUF2: %[[IDXA_CAST:.*]] = bitcast double* %[[IDXA]] to * ; CHECKUF2: store %[[FADD]], * %[[IDXA_CAST]], align 8, !alias.scope !3, !noalias !0 diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll index 4c3ad8ee8aa9..bc73494d6a57 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -19,19 +19,19 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[EXTRA_ITER]], [[N_VEC]] ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[EXTRA_ITER]], 1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TRIP_COUNT_MINUS_1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[EXTRA_ITER]], [[INDEX]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> undef, i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], diff --git a/llvm/test/Transforms/LoopVectorize/vector-geps.ll b/llvm/test/Transforms/LoopVectorize/vector-geps.ll index bd79499d5d34..7e531bd07729 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-geps.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-geps.ll @@ -35,8 +35,8 @@ for.end: ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* %b, i64 1 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32*> [[DOTSPLATINSERT]], <4 x i32*> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32*> [[DOTSPLATINSERT]], <4 x i32*> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32** [[TMP2]] to <4 x i32*>* ; CHECK-NEXT: store <4 x i32*> [[DOTSPLAT]], <4 x i32*>* [[TMP3]], align 8 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll index 534eb59b6bc2..9ba0f30ba142 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll @@ -5,236 +5,236 @@ define void @transpose_multiply(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x double>* %C.Ptr) { ; CHECK-LABEL: @transpose_multiply( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <9 x double>* [[A_PTR:%.*]] to double* +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP0]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 3 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST1]], align 8 +; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr double, double* [[TMP0]], i64 6 +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[VEC_GEP3]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST4]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <9 x double>* [[B_PTR:%.*]] to double* +; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[TMP1]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST6]], align 8 +; CHECK-NEXT: [[VEC_GEP8:%.*]] = getelementptr double, double* [[TMP1]], i64 3 +; CHECK-NEXT: [[VEC_CAST9:%.*]] = bitcast double* [[VEC_GEP8]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD10:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST9]], align 8 +; CHECK-NEXT: [[VEC_GEP11:%.*]] = getelementptr double, double* [[TMP1]], i64 6 +; CHECK-NEXT: [[VEC_CAST12:%.*]] = bitcast double* [[VEC_GEP11]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST12]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x double> undef, double [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x double> [[TMP3]], double [[TMP4]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x double> [[TMP5]], double [[TMP6]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x double> undef, double [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 1 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 1 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x double> [[TMP11]], double [[TMP12]], i64 2 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x double> undef, double [[TMP14]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 2 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <3 x double> [[TMP17]], double [[TMP18]], i64 2 +; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]] +; CHECK-NEXT: [[TMP24:%.*]] = fadd <1 x double> [[TMP21]], [[TMP23]] +; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP25]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]] +; CHECK-NEXT: [[TMP27:%.*]] = fadd <1 x double> [[TMP24]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <1 x double> [[TMP27]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP28]], <3 x i32> +; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]] +; CHECK-NEXT: [[BLOCK23:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP32]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP33:%.*]] = fmul <1 x double> [[BLOCK23]], [[SPLAT_SPLAT25]] +; CHECK-NEXT: [[TMP34:%.*]] = fadd <1 x double> [[TMP31]], [[TMP33]] +; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP36:%.*]] = fmul <1 x double> [[BLOCK26]], [[SPLAT_SPLAT28]] +; CHECK-NEXT: [[TMP37:%.*]] = fadd <1 x double> [[TMP34]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <1 x double> [[TMP37]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <3 x double> [[TMP29]], <3 x double> [[TMP38]], <3 x i32> +; CHECK-NEXT: [[BLOCK29:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP40]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP41:%.*]] = fmul <1 x double> [[BLOCK29]], [[SPLAT_SPLAT31]] +; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP43:%.*]] = fmul <1 x double> [[BLOCK32]], [[SPLAT_SPLAT34]] +; CHECK-NEXT: [[TMP44:%.*]] = fadd <1 x double> [[TMP41]], [[TMP43]] +; CHECK-NEXT: [[BLOCK35:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP45]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP46:%.*]] = fmul <1 x double> [[BLOCK35]], [[SPLAT_SPLAT37]] +; CHECK-NEXT: [[TMP47:%.*]] = fadd <1 x double> [[TMP44]], [[TMP46]] +; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <1 x double> [[TMP47]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <3 x double> [[TMP39]], <3 x double> [[TMP48]], <3 x i32> +; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP50]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP51:%.*]] = fmul <1 x double> [[BLOCK38]], [[SPLAT_SPLAT40]] +; CHECK-NEXT: [[BLOCK41:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP53:%.*]] = fmul <1 x double> [[BLOCK41]], [[SPLAT_SPLAT43]] +; CHECK-NEXT: [[TMP54:%.*]] = fadd <1 x double> [[TMP51]], [[TMP53]] +; CHECK-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP55]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP56:%.*]] = fmul <1 x double> [[BLOCK44]], [[SPLAT_SPLAT46]] +; CHECK-NEXT: [[TMP57:%.*]] = fadd <1 x double> [[TMP54]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <1 x double> [[TMP57]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP58]], <3 x i32> +; CHECK-NEXT: [[BLOCK47:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP60]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP61:%.*]] = fmul <1 x double> [[BLOCK47]], [[SPLAT_SPLAT49]] +; CHECK-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP63:%.*]] = fmul <1 x double> [[BLOCK50]], [[SPLAT_SPLAT52]] +; CHECK-NEXT: [[TMP64:%.*]] = fadd <1 x double> [[TMP61]], [[TMP63]] +; CHECK-NEXT: [[BLOCK53:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP65]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP66:%.*]] = fmul <1 x double> [[BLOCK53]], [[SPLAT_SPLAT55]] +; CHECK-NEXT: [[TMP67:%.*]] = fadd <1 x double> [[TMP64]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <1 x double> [[TMP67]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <3 x double> [[TMP59]], <3 x double> [[TMP68]], <3 x i32> +; CHECK-NEXT: [[BLOCK56:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT57:%.*]] = insertelement <1 x double> poison, double [[TMP70]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT58:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT57]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP71:%.*]] = fmul <1 x double> [[BLOCK56]], [[SPLAT_SPLAT58]] +; CHECK-NEXT: [[BLOCK59:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP72:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT60:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT61:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT60]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP73:%.*]] = fmul <1 x double> [[BLOCK59]], [[SPLAT_SPLAT61]] +; CHECK-NEXT: [[TMP74:%.*]] = fadd <1 x double> [[TMP71]], [[TMP73]] +; CHECK-NEXT: [[BLOCK62:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP75:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT63:%.*]] = insertelement <1 x double> poison, double [[TMP75]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT64:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT63]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP76:%.*]] = fmul <1 x double> [[BLOCK62]], [[SPLAT_SPLAT64]] +; CHECK-NEXT: [[TMP77:%.*]] = fadd <1 x double> [[TMP74]], [[TMP76]] +; CHECK-NEXT: [[TMP78:%.*]] = shufflevector <1 x double> [[TMP77]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP79:%.*]] = shufflevector <3 x double> [[TMP69]], <3 x double> [[TMP78]], <3 x i32> +; CHECK-NEXT: [[BLOCK65:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT66:%.*]] = insertelement <1 x double> poison, double [[TMP80]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT67:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT66]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP81:%.*]] = fmul <1 x double> [[BLOCK65]], [[SPLAT_SPLAT67]] +; CHECK-NEXT: [[BLOCK68:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT69:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT70:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT69]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP83:%.*]] = fmul <1 x double> [[BLOCK68]], [[SPLAT_SPLAT70]] +; CHECK-NEXT: [[TMP84:%.*]] = fadd <1 x double> [[TMP81]], [[TMP83]] +; CHECK-NEXT: [[BLOCK71:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP85:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT72:%.*]] = insertelement <1 x double> poison, double [[TMP85]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT73:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT72]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP86:%.*]] = fmul <1 x double> [[BLOCK71]], [[SPLAT_SPLAT73]] +; CHECK-NEXT: [[TMP87:%.*]] = fadd <1 x double> [[TMP84]], [[TMP86]] +; CHECK-NEXT: [[TMP88:%.*]] = shufflevector <1 x double> [[TMP87]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP88]], <3 x i32> +; CHECK-NEXT: [[BLOCK74:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT75:%.*]] = insertelement <1 x double> poison, double [[TMP90]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT76:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT75]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP91:%.*]] = fmul <1 x double> [[BLOCK74]], [[SPLAT_SPLAT76]] +; CHECK-NEXT: [[BLOCK77:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT78:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT79:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT78]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP93:%.*]] = fmul <1 x double> [[BLOCK77]], [[SPLAT_SPLAT79]] +; CHECK-NEXT: [[TMP94:%.*]] = fadd <1 x double> [[TMP91]], [[TMP93]] +; CHECK-NEXT: [[BLOCK80:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP95:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT81:%.*]] = insertelement <1 x double> poison, double [[TMP95]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT82:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT81]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP96:%.*]] = fmul <1 x double> [[BLOCK80]], [[SPLAT_SPLAT82]] +; CHECK-NEXT: [[TMP97:%.*]] = fadd <1 x double> [[TMP94]], [[TMP96]] +; CHECK-NEXT: [[TMP98:%.*]] = shufflevector <1 x double> [[TMP97]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP99:%.*]] = shufflevector <3 x double> [[TMP89]], <3 x double> [[TMP98]], <3 x i32> +; CHECK-NEXT: [[BLOCK83:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT84:%.*]] = insertelement <1 x double> poison, double [[TMP100]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT85:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT84]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP101:%.*]] = fmul <1 x double> [[BLOCK83]], [[SPLAT_SPLAT85]] +; CHECK-NEXT: [[BLOCK86:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP102:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT87:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT88:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT87]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP103:%.*]] = fmul <1 x double> [[BLOCK86]], [[SPLAT_SPLAT88]] +; CHECK-NEXT: [[TMP104:%.*]] = fadd <1 x double> [[TMP101]], [[TMP103]] +; CHECK-NEXT: [[BLOCK89:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP105:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT90:%.*]] = insertelement <1 x double> poison, double [[TMP105]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT91:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT90]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP106:%.*]] = fmul <1 x double> [[BLOCK89]], [[SPLAT_SPLAT91]] +; CHECK-NEXT: [[TMP107:%.*]] = fadd <1 x double> [[TMP104]], [[TMP106]] +; CHECK-NEXT: [[TMP108:%.*]] = shufflevector <1 x double> [[TMP107]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP109:%.*]] = shufflevector <3 x double> [[TMP99]], <3 x double> [[TMP108]], <3 x i32> +; CHECK-NEXT: [[TMP110:%.*]] = bitcast <9 x double>* [[C_PTR:%.*]] to double* +; CHECK-NEXT: [[VEC_CAST92:%.*]] = bitcast double* [[TMP110]] to <3 x double>* +; CHECK-NEXT: store <3 x double> [[TMP49]], <3 x double>* [[VEC_CAST92]], align 8 +; CHECK-NEXT: [[VEC_GEP93:%.*]] = getelementptr double, double* [[TMP110]], i64 3 +; CHECK-NEXT: [[VEC_CAST94:%.*]] = bitcast double* [[VEC_GEP93]] to <3 x double>* +; CHECK-NEXT: store <3 x double> [[TMP79]], <3 x double>* [[VEC_CAST94]], align 8 +; CHECK-NEXT: [[VEC_GEP95:%.*]] = getelementptr double, double* [[TMP110]], i64 6 +; CHECK-NEXT: [[VEC_CAST96:%.*]] = bitcast double* [[VEC_GEP95]] to <3 x double>* +; CHECK-NEXT: store <3 x double> [[TMP109]], <3 x double>* [[VEC_CAST96]], align 8 +; CHECK-NEXT: ret void +; ; Load columns of input matrixes %A and %B. -; CHECK-NEXT: [[TMP0:%.*]] = bitcast <9 x double>* [[A_PTR:%.*]] to double* -; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP0]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST]], align 8 -; CHECK-NEXT: [[COL_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 3 -; CHECK-NEXT: [[COL_CAST1:%.*]] = bitcast double* [[COL_GEP]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST1]], align 8 -; CHECK-NEXT: [[COL_GEP3:%.*]] = getelementptr double, double* [[TMP0]], i64 6 -; CHECK-NEXT: [[COL_CAST4:%.*]] = bitcast double* [[COL_GEP3]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST4]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <9 x double>* [[B_PTR:%.*]] to double* -; CHECK-NEXT: [[COL_CAST6:%.*]] = bitcast double* [[TMP1]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST6]], align 8 -; CHECK-NEXT: [[COL_GEP8:%.*]] = getelementptr double, double* [[TMP1]], i64 3 -; CHECK-NEXT: [[COL_CAST9:%.*]] = bitcast double* [[COL_GEP8]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD10:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST9]], align 8 -; CHECK-NEXT: [[COL_GEP11:%.*]] = getelementptr double, double* [[TMP1]], i64 6 -; CHECK-NEXT: [[COL_CAST12:%.*]] = bitcast double* [[COL_GEP11]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST12]], align 8 ; Transpose %A. -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x double> [[TMP1]], double [[TMP2]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x double> [[TMP3]], double [[TMP4]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x double> undef, double [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x double> [[TMP7]], double [[TMP8]], i64 1 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 1 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 2 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 2 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x double> undef, double [[TMP12]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 2 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x double> [[TMP13]], double [[TMP14]], i64 1 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 2 ; Lower multiply(transpose(%A), %B) -; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP18]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> undef, double [[TMP20]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = fmul <1 x double> [[BLOCK6]], [[SPLAT_SPLAT8]] -; CHECK-NEXT: [[TMP22:%.*]] = fadd <1 x double> [[TMP19]], [[TMP21]] -; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]] -; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> -; CHECK-NEXT: [[BLOCK12:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> undef, double [[TMP28]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP29:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]] -; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> undef, double [[TMP30]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]] -; CHECK-NEXT: [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]] -; CHECK-NEXT: [[BLOCK18:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> undef, double [[TMP33]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP34:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]] -; CHECK-NEXT: [[TMP35:%.*]] = fadd <1 x double> [[TMP32]], [[TMP34]] -; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <1 x double> [[TMP35]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP36]], <3 x i32> -; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP38:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> undef, double [[TMP38]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP39:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]] -; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> undef, double [[TMP40]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP41:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]] -; CHECK-NEXT: [[TMP42:%.*]] = fadd <1 x double> [[TMP39]], [[TMP41]] -; CHECK-NEXT: [[BLOCK27:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> undef, double [[TMP43]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP44:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]] -; CHECK-NEXT: [[TMP45:%.*]] = fadd <1 x double> [[TMP42]], [[TMP44]] -; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <1 x double> [[TMP45]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <3 x double> [[TMP37]], <3 x double> [[TMP46]], <3 x i32> -; CHECK-NEXT: [[BLOCK30:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP48:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> undef, double [[TMP48]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP49:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]] -; CHECK-NEXT: [[BLOCK33:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> undef, double [[TMP50]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP51:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]] -; CHECK-NEXT: [[TMP52:%.*]] = fadd <1 x double> [[TMP49]], [[TMP51]] -; CHECK-NEXT: [[BLOCK36:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> undef, double [[TMP53]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP54:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]] -; CHECK-NEXT: [[TMP55:%.*]] = fadd <1 x double> [[TMP52]], [[TMP54]] -; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <1 x double> [[TMP55]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP56]], <3 x i32> -; CHECK-NEXT: [[BLOCK39:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP58:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> undef, double [[TMP58]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP59:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]] -; CHECK-NEXT: [[BLOCK42:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> undef, double [[TMP60]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP61:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]] -; CHECK-NEXT: [[TMP62:%.*]] = fadd <1 x double> [[TMP59]], [[TMP61]] -; CHECK-NEXT: [[BLOCK45:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP63:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> undef, double [[TMP63]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP64:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]] -; CHECK-NEXT: [[TMP65:%.*]] = fadd <1 x double> [[TMP62]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <1 x double> [[TMP65]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <3 x double> [[TMP57]], <3 x double> [[TMP66]], <3 x i32> -; CHECK-NEXT: [[BLOCK48:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP68:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> undef, double [[TMP68]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP69:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]] -; CHECK-NEXT: [[BLOCK51:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> undef, double [[TMP70]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP71:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]] -; CHECK-NEXT: [[TMP72:%.*]] = fadd <1 x double> [[TMP69]], [[TMP71]] -; CHECK-NEXT: [[BLOCK54:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP73:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> undef, double [[TMP73]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP74:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]] -; CHECK-NEXT: [[TMP75:%.*]] = fadd <1 x double> [[TMP72]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <1 x double> [[TMP75]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP77:%.*]] = shufflevector <3 x double> [[TMP67]], <3 x double> [[TMP76]], <3 x i32> -; CHECK-NEXT: [[BLOCK57:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP78:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> undef, double [[TMP78]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP79:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]] -; CHECK-NEXT: [[BLOCK60:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> undef, double [[TMP80]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP81:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]] -; CHECK-NEXT: [[TMP82:%.*]] = fadd <1 x double> [[TMP79]], [[TMP81]] -; CHECK-NEXT: [[BLOCK63:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP83:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> undef, double [[TMP83]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP84:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]] -; CHECK-NEXT: [[TMP85:%.*]] = fadd <1 x double> [[TMP82]], [[TMP84]] -; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <1 x double> [[TMP85]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP86]], <3 x i32> -; CHECK-NEXT: [[BLOCK66:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> undef, double [[TMP88]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP89:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]] -; CHECK-NEXT: [[BLOCK69:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> undef, double [[TMP90]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP91:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]] -; CHECK-NEXT: [[TMP92:%.*]] = fadd <1 x double> [[TMP89]], [[TMP91]] -; CHECK-NEXT: [[BLOCK72:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP93:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> undef, double [[TMP93]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP94:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]] -; CHECK-NEXT: [[TMP95:%.*]] = fadd <1 x double> [[TMP92]], [[TMP94]] -; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <1 x double> [[TMP95]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <3 x double> [[TMP87]], <3 x double> [[TMP96]], <3 x i32> -; CHECK-NEXT: [[BLOCK75:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP98:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> undef, double [[TMP98]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP99:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]] -; CHECK-NEXT: [[BLOCK78:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> undef, double [[TMP100]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP101:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]] -; CHECK-NEXT: [[TMP102:%.*]] = fadd <1 x double> [[TMP99]], [[TMP101]] -; CHECK-NEXT: [[BLOCK81:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP103:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> undef, double [[TMP103]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP104:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]] -; CHECK-NEXT: [[TMP105:%.*]] = fadd <1 x double> [[TMP102]], [[TMP104]] -; CHECK-NEXT: [[TMP106:%.*]] = shufflevector <1 x double> [[TMP105]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <3 x double> [[TMP97]], <3 x double> [[TMP106]], <3 x i32> ; Store result columns. -; CHECK-NEXT: [[TMP108:%.*]] = bitcast <9 x double>* [[C_PTR:%.*]] to double* -; CHECK-NEXT: [[TMP109:%.*]] = bitcast double* [[TMP108]] to <3 x double>* -; CHECK-NEXT: store <3 x double> [[TMP47]], <3 x double>* [[TMP109]], align 8 -; CHECK-NEXT: [[TMP110:%.*]] = getelementptr double, double* [[TMP108]], i64 3 -; CHECK-NEXT: [[TMP111:%.*]] = bitcast double* [[TMP110]] to <3 x double>* -; CHECK-NEXT: store <3 x double> [[TMP77]], <3 x double>* [[TMP111]], align 8 -; CHECK-NEXT: [[TMP112:%.*]] = getelementptr double, double* [[TMP108]], i64 6 -; CHECK-NEXT: [[TMP113:%.*]] = bitcast double* [[TMP112]] to <3 x double>* -; CHECK-NEXT: store <3 x double> [[TMP107]], <3 x double>* [[TMP113]], align 8 -; CHECK-NEXT: ret void -; entry: %a = load <9 x double>, <9 x double>* %A.Ptr, align 8 @@ -251,255 +251,256 @@ declare <9 x double> @llvm.matrix.multiply.v9f64.v9f64.v9f64(<9 x double>, <9 x define void @transpose_multiply_add(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x double>* %C.Ptr) { ; CHECK-LABEL: @transpose_multiply_add( ; CHECK-NEXT: entry: - ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <9 x double>* [[A_PTR:%.*]] to double* -; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP0]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST]], align 8 -; CHECK-NEXT: [[COL_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 3 -; CHECK-NEXT: [[COL_CAST1:%.*]] = bitcast double* [[COL_GEP]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST1]], align 8 -; CHECK-NEXT: [[COL_GEP3:%.*]] = getelementptr double, double* [[TMP0]], i64 6 -; CHECK-NEXT: [[COL_CAST4:%.*]] = bitcast double* [[COL_GEP3]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST4]], align 8 +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP0]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 3 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST1]], align 8 +; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr double, double* [[TMP0]], i64 6 +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[VEC_GEP3]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST4]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <9 x double>* [[B_PTR:%.*]] to double* -; CHECK-NEXT: [[COL_CAST6:%.*]] = bitcast double* [[TMP1]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST6]], align 8 -; CHECK-NEXT: [[COL_GEP8:%.*]] = getelementptr double, double* [[TMP1]], i64 3 -; CHECK-NEXT: [[COL_CAST9:%.*]] = bitcast double* [[COL_GEP8]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD10:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST9]], align 8 -; CHECK-NEXT: [[COL_GEP11:%.*]] = getelementptr double, double* [[TMP1]], i64 6 -; CHECK-NEXT: [[COL_CAST12:%.*]] = bitcast double* [[COL_GEP11]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST12]], align 8 +; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[TMP1]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST6]], align 8 +; CHECK-NEXT: [[VEC_GEP8:%.*]] = getelementptr double, double* [[TMP1]], i64 3 +; CHECK-NEXT: [[VEC_CAST9:%.*]] = bitcast double* [[VEC_GEP8]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD10:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST9]], align 8 +; CHECK-NEXT: [[VEC_GEP11:%.*]] = getelementptr double, double* [[TMP1]], i64 6 +; CHECK-NEXT: [[VEC_CAST12:%.*]] = bitcast double* [[VEC_GEP11]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST12]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x double> undef, double [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x double> [[TMP3]], double [[TMP4]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x double> [[TMP5]], double [[TMP6]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x double> undef, double [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 1 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 1 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x double> [[TMP11]], double [[TMP12]], i64 2 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x double> undef, double [[TMP14]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 2 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <3 x double> [[TMP17]], double [[TMP18]], i64 2 +; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]] +; CHECK-NEXT: [[TMP24:%.*]] = fadd <1 x double> [[TMP21]], [[TMP23]] +; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP25]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]] +; CHECK-NEXT: [[TMP27:%.*]] = fadd <1 x double> [[TMP24]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <1 x double> [[TMP27]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP28]], <3 x i32> +; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]] +; CHECK-NEXT: [[BLOCK23:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP32]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP33:%.*]] = fmul <1 x double> [[BLOCK23]], [[SPLAT_SPLAT25]] +; CHECK-NEXT: [[TMP34:%.*]] = fadd <1 x double> [[TMP31]], [[TMP33]] +; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP36:%.*]] = fmul <1 x double> [[BLOCK26]], [[SPLAT_SPLAT28]] +; CHECK-NEXT: [[TMP37:%.*]] = fadd <1 x double> [[TMP34]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <1 x double> [[TMP37]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <3 x double> [[TMP29]], <3 x double> [[TMP38]], <3 x i32> +; CHECK-NEXT: [[BLOCK29:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP40]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP41:%.*]] = fmul <1 x double> [[BLOCK29]], [[SPLAT_SPLAT31]] +; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP43:%.*]] = fmul <1 x double> [[BLOCK32]], [[SPLAT_SPLAT34]] +; CHECK-NEXT: [[TMP44:%.*]] = fadd <1 x double> [[TMP41]], [[TMP43]] +; CHECK-NEXT: [[BLOCK35:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP45]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP46:%.*]] = fmul <1 x double> [[BLOCK35]], [[SPLAT_SPLAT37]] +; CHECK-NEXT: [[TMP47:%.*]] = fadd <1 x double> [[TMP44]], [[TMP46]] +; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <1 x double> [[TMP47]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <3 x double> [[TMP39]], <3 x double> [[TMP48]], <3 x i32> +; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP50]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP51:%.*]] = fmul <1 x double> [[BLOCK38]], [[SPLAT_SPLAT40]] +; CHECK-NEXT: [[BLOCK41:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP53:%.*]] = fmul <1 x double> [[BLOCK41]], [[SPLAT_SPLAT43]] +; CHECK-NEXT: [[TMP54:%.*]] = fadd <1 x double> [[TMP51]], [[TMP53]] +; CHECK-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP55]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP56:%.*]] = fmul <1 x double> [[BLOCK44]], [[SPLAT_SPLAT46]] +; CHECK-NEXT: [[TMP57:%.*]] = fadd <1 x double> [[TMP54]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <1 x double> [[TMP57]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP58]], <3 x i32> +; CHECK-NEXT: [[BLOCK47:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP60]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP61:%.*]] = fmul <1 x double> [[BLOCK47]], [[SPLAT_SPLAT49]] +; CHECK-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP63:%.*]] = fmul <1 x double> [[BLOCK50]], [[SPLAT_SPLAT52]] +; CHECK-NEXT: [[TMP64:%.*]] = fadd <1 x double> [[TMP61]], [[TMP63]] +; CHECK-NEXT: [[BLOCK53:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP65]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP66:%.*]] = fmul <1 x double> [[BLOCK53]], [[SPLAT_SPLAT55]] +; CHECK-NEXT: [[TMP67:%.*]] = fadd <1 x double> [[TMP64]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <1 x double> [[TMP67]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <3 x double> [[TMP59]], <3 x double> [[TMP68]], <3 x i32> +; CHECK-NEXT: [[BLOCK56:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT57:%.*]] = insertelement <1 x double> poison, double [[TMP70]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT58:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT57]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP71:%.*]] = fmul <1 x double> [[BLOCK56]], [[SPLAT_SPLAT58]] +; CHECK-NEXT: [[BLOCK59:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP72:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT60:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT61:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT60]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP73:%.*]] = fmul <1 x double> [[BLOCK59]], [[SPLAT_SPLAT61]] +; CHECK-NEXT: [[TMP74:%.*]] = fadd <1 x double> [[TMP71]], [[TMP73]] +; CHECK-NEXT: [[BLOCK62:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP75:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT63:%.*]] = insertelement <1 x double> poison, double [[TMP75]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT64:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT63]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP76:%.*]] = fmul <1 x double> [[BLOCK62]], [[SPLAT_SPLAT64]] +; CHECK-NEXT: [[TMP77:%.*]] = fadd <1 x double> [[TMP74]], [[TMP76]] +; CHECK-NEXT: [[TMP78:%.*]] = shufflevector <1 x double> [[TMP77]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP79:%.*]] = shufflevector <3 x double> [[TMP69]], <3 x double> [[TMP78]], <3 x i32> +; CHECK-NEXT: [[BLOCK65:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT66:%.*]] = insertelement <1 x double> poison, double [[TMP80]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT67:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT66]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP81:%.*]] = fmul <1 x double> [[BLOCK65]], [[SPLAT_SPLAT67]] +; CHECK-NEXT: [[BLOCK68:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT69:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT70:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT69]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP83:%.*]] = fmul <1 x double> [[BLOCK68]], [[SPLAT_SPLAT70]] +; CHECK-NEXT: [[TMP84:%.*]] = fadd <1 x double> [[TMP81]], [[TMP83]] +; CHECK-NEXT: [[BLOCK71:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP85:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT72:%.*]] = insertelement <1 x double> poison, double [[TMP85]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT73:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT72]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP86:%.*]] = fmul <1 x double> [[BLOCK71]], [[SPLAT_SPLAT73]] +; CHECK-NEXT: [[TMP87:%.*]] = fadd <1 x double> [[TMP84]], [[TMP86]] +; CHECK-NEXT: [[TMP88:%.*]] = shufflevector <1 x double> [[TMP87]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP88]], <3 x i32> +; CHECK-NEXT: [[BLOCK74:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT75:%.*]] = insertelement <1 x double> poison, double [[TMP90]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT76:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT75]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP91:%.*]] = fmul <1 x double> [[BLOCK74]], [[SPLAT_SPLAT76]] +; CHECK-NEXT: [[BLOCK77:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT78:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT79:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT78]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP93:%.*]] = fmul <1 x double> [[BLOCK77]], [[SPLAT_SPLAT79]] +; CHECK-NEXT: [[TMP94:%.*]] = fadd <1 x double> [[TMP91]], [[TMP93]] +; CHECK-NEXT: [[BLOCK80:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP95:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT81:%.*]] = insertelement <1 x double> poison, double [[TMP95]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT82:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT81]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP96:%.*]] = fmul <1 x double> [[BLOCK80]], [[SPLAT_SPLAT82]] +; CHECK-NEXT: [[TMP97:%.*]] = fadd <1 x double> [[TMP94]], [[TMP96]] +; CHECK-NEXT: [[TMP98:%.*]] = shufflevector <1 x double> [[TMP97]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP99:%.*]] = shufflevector <3 x double> [[TMP89]], <3 x double> [[TMP98]], <3 x i32> +; CHECK-NEXT: [[BLOCK83:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT84:%.*]] = insertelement <1 x double> poison, double [[TMP100]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT85:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT84]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP101:%.*]] = fmul <1 x double> [[BLOCK83]], [[SPLAT_SPLAT85]] +; CHECK-NEXT: [[BLOCK86:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP102:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT87:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT88:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT87]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP103:%.*]] = fmul <1 x double> [[BLOCK86]], [[SPLAT_SPLAT88]] +; CHECK-NEXT: [[TMP104:%.*]] = fadd <1 x double> [[TMP101]], [[TMP103]] +; CHECK-NEXT: [[BLOCK89:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP105:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT90:%.*]] = insertelement <1 x double> poison, double [[TMP105]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT91:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT90]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP106:%.*]] = fmul <1 x double> [[BLOCK89]], [[SPLAT_SPLAT91]] +; CHECK-NEXT: [[TMP107:%.*]] = fadd <1 x double> [[TMP104]], [[TMP106]] +; CHECK-NEXT: [[TMP108:%.*]] = shufflevector <1 x double> [[TMP107]], <1 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP109:%.*]] = shufflevector <3 x double> [[TMP99]], <3 x double> [[TMP108]], <3 x i32> +; CHECK-NEXT: [[TMP110:%.*]] = bitcast <9 x double>* [[C_PTR:%.*]] to double* +; CHECK-NEXT: [[VEC_CAST92:%.*]] = bitcast double* [[TMP110]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD93:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST92]], align 8 +; CHECK-NEXT: [[VEC_GEP94:%.*]] = getelementptr double, double* [[TMP110]], i64 3 +; CHECK-NEXT: [[VEC_CAST95:%.*]] = bitcast double* [[VEC_GEP94]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD96:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST95]], align 8 +; CHECK-NEXT: [[VEC_GEP97:%.*]] = getelementptr double, double* [[TMP110]], i64 6 +; CHECK-NEXT: [[VEC_CAST98:%.*]] = bitcast double* [[VEC_GEP97]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD99:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST98]], align 8 +; CHECK-NEXT: [[TMP111:%.*]] = fadd <3 x double> [[COL_LOAD93]], [[TMP49]] +; CHECK-NEXT: [[TMP112:%.*]] = fadd <3 x double> [[COL_LOAD96]], [[TMP79]] +; CHECK-NEXT: [[TMP113:%.*]] = fadd <3 x double> [[COL_LOAD99]], [[TMP109]] +; CHECK-NEXT: [[TMP114:%.*]] = bitcast <9 x double>* [[C_PTR]] to double* +; CHECK-NEXT: [[VEC_CAST100:%.*]] = bitcast double* [[TMP114]] to <3 x double>* +; CHECK-NEXT: store <3 x double> [[TMP111]], <3 x double>* [[VEC_CAST100]], align 8 +; CHECK-NEXT: [[VEC_GEP101:%.*]] = getelementptr double, double* [[TMP114]], i64 3 +; CHECK-NEXT: [[VEC_CAST102:%.*]] = bitcast double* [[VEC_GEP101]] to <3 x double>* +; CHECK-NEXT: store <3 x double> [[TMP112]], <3 x double>* [[VEC_CAST102]], align 8 +; CHECK-NEXT: [[VEC_GEP103:%.*]] = getelementptr double, double* [[TMP114]], i64 6 +; CHECK-NEXT: [[VEC_CAST104:%.*]] = bitcast double* [[VEC_GEP103]] to <3 x double>* +; CHECK-NEXT: store <3 x double> [[TMP113]], <3 x double>* [[VEC_CAST104]], align 8 +; CHECK-NEXT: ret void +; + ; Transpose %A. -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x double> [[TMP1]], double [[TMP2]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x double> [[TMP3]], double [[TMP4]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x double> undef, double [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x double> [[TMP7]], double [[TMP8]], i64 1 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 1 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 2 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 2 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x double> undef, double [[TMP12]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 2 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x double> [[TMP13]], double [[TMP14]], i64 1 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 2 ; Lower multiply(transpose(%A), %B) -; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP18]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> undef, double [[TMP20]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = fmul <1 x double> [[BLOCK6]], [[SPLAT_SPLAT8]] -; CHECK-NEXT: [[TMP22:%.*]] = fadd <1 x double> [[TMP19]], [[TMP21]] -; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]] -; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> -; CHECK-NEXT: [[BLOCK12:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> undef, double [[TMP28]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP29:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]] -; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> undef, double [[TMP30]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]] -; CHECK-NEXT: [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]] -; CHECK-NEXT: [[BLOCK18:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> undef, double [[TMP33]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP34:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]] -; CHECK-NEXT: [[TMP35:%.*]] = fadd <1 x double> [[TMP32]], [[TMP34]] -; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <1 x double> [[TMP35]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP36]], <3 x i32> -; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP38:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> undef, double [[TMP38]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP39:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]] -; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> undef, double [[TMP40]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP41:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]] -; CHECK-NEXT: [[TMP42:%.*]] = fadd <1 x double> [[TMP39]], [[TMP41]] -; CHECK-NEXT: [[BLOCK27:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> undef, double [[TMP43]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP44:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]] -; CHECK-NEXT: [[TMP45:%.*]] = fadd <1 x double> [[TMP42]], [[TMP44]] -; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <1 x double> [[TMP45]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <3 x double> [[TMP37]], <3 x double> [[TMP46]], <3 x i32> -; CHECK-NEXT: [[BLOCK30:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP48:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> undef, double [[TMP48]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP49:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]] -; CHECK-NEXT: [[BLOCK33:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> undef, double [[TMP50]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP51:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]] -; CHECK-NEXT: [[TMP52:%.*]] = fadd <1 x double> [[TMP49]], [[TMP51]] -; CHECK-NEXT: [[BLOCK36:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> undef, double [[TMP53]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP54:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]] -; CHECK-NEXT: [[TMP55:%.*]] = fadd <1 x double> [[TMP52]], [[TMP54]] -; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <1 x double> [[TMP55]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP56]], <3 x i32> -; CHECK-NEXT: [[BLOCK39:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP58:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> undef, double [[TMP58]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP59:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]] -; CHECK-NEXT: [[BLOCK42:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> undef, double [[TMP60]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP61:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]] -; CHECK-NEXT: [[TMP62:%.*]] = fadd <1 x double> [[TMP59]], [[TMP61]] -; CHECK-NEXT: [[BLOCK45:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP63:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> undef, double [[TMP63]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP64:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]] -; CHECK-NEXT: [[TMP65:%.*]] = fadd <1 x double> [[TMP62]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <1 x double> [[TMP65]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <3 x double> [[TMP57]], <3 x double> [[TMP66]], <3 x i32> -; CHECK-NEXT: [[BLOCK48:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP68:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> undef, double [[TMP68]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP69:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]] -; CHECK-NEXT: [[BLOCK51:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> undef, double [[TMP70]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP71:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]] -; CHECK-NEXT: [[TMP72:%.*]] = fadd <1 x double> [[TMP69]], [[TMP71]] -; CHECK-NEXT: [[BLOCK54:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP73:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> undef, double [[TMP73]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP74:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]] -; CHECK-NEXT: [[TMP75:%.*]] = fadd <1 x double> [[TMP72]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <1 x double> [[TMP75]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP77:%.*]] = shufflevector <3 x double> [[TMP67]], <3 x double> [[TMP76]], <3 x i32> -; CHECK-NEXT: [[BLOCK57:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP78:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> undef, double [[TMP78]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP79:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]] -; CHECK-NEXT: [[BLOCK60:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> undef, double [[TMP80]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP81:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]] -; CHECK-NEXT: [[TMP82:%.*]] = fadd <1 x double> [[TMP79]], [[TMP81]] -; CHECK-NEXT: [[BLOCK63:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP83:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> undef, double [[TMP83]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP84:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]] -; CHECK-NEXT: [[TMP85:%.*]] = fadd <1 x double> [[TMP82]], [[TMP84]] -; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <1 x double> [[TMP85]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP86]], <3 x i32> -; CHECK-NEXT: [[BLOCK66:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> undef, double [[TMP88]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP89:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]] -; CHECK-NEXT: [[BLOCK69:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> undef, double [[TMP90]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP91:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]] -; CHECK-NEXT: [[TMP92:%.*]] = fadd <1 x double> [[TMP89]], [[TMP91]] -; CHECK-NEXT: [[BLOCK72:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP93:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> undef, double [[TMP93]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP94:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]] -; CHECK-NEXT: [[TMP95:%.*]] = fadd <1 x double> [[TMP92]], [[TMP94]] -; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <1 x double> [[TMP95]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <3 x double> [[TMP87]], <3 x double> [[TMP96]], <3 x i32> -; CHECK-NEXT: [[BLOCK75:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP98:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> undef, double [[TMP98]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP99:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]] -; CHECK-NEXT: [[BLOCK78:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> undef, double [[TMP100]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP101:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]] -; CHECK-NEXT: [[TMP102:%.*]] = fadd <1 x double> [[TMP99]], [[TMP101]] -; CHECK-NEXT: [[BLOCK81:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP103:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> undef, double [[TMP103]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP104:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]] -; CHECK-NEXT: [[TMP105:%.*]] = fadd <1 x double> [[TMP102]], [[TMP104]] ; Embed result of multiply into flat vector. -; CHECK-NEXT: [[TMP106:%.*]] = shufflevector <1 x double> [[TMP105]], <1 x double> undef, <3 x i32> -; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <3 x double> [[TMP97]], <3 x double> [[TMP106]], <3 x i32> ; Load %C. -; CHECK-NEXT: [[TMP110:%.*]] = bitcast <9 x double>* [[C_PTR:%.*]] to double* -; CHECK-NEXT: [[COL_CAST92:%.*]] = bitcast double* [[TMP110]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD93:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST92]], align 8 -; CHECK-NEXT: [[COL_GEP94:%.*]] = getelementptr double, double* [[TMP110]], i64 3 -; CHECK-NEXT: [[COL_CAST95:%.*]] = bitcast double* [[COL_GEP94]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD96:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST95]], align 8 -; CHECK-NEXT: [[COL_GEP97:%.*]] = getelementptr double, double* [[TMP110]], i64 6 -; CHECK-NEXT: [[COL_CAST98:%.*]] = bitcast double* [[COL_GEP97]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD99:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST98]], align 8 ; Add column vectors. -; CHECK-NEXT: [[TMP108:%.*]] = fadd <3 x double> [[COL_LOAD93]], [[TMP47]] -; CHECK-NEXT: [[TMP109:%.*]] = fadd <3 x double> [[COL_LOAD96]], [[TMP77]] -; CHECK-NEXT: [[TMP110:%.*]] = fadd <3 x double> [[COL_LOAD99]], [[TMP107]] ; Store result columns. -; CHECK-NEXT: [[TMP111:%.*]] = bitcast <9 x double>* [[C_PTR]] to double* -; CHECK-NEXT: [[TMP112:%.*]] = bitcast double* [[TMP111]] to <3 x double>* -; CHECK-NEXT: store <3 x double> [[TMP108]], <3 x double>* [[TMP112]], align 8 -; CHECK-NEXT: [[TMP113:%.*]] = getelementptr double, double* [[TMP111]], i64 3 -; CHECK-NEXT: [[TMP114:%.*]] = bitcast double* [[TMP113]] to <3 x double>* -; CHECK-NEXT: store <3 x double> [[TMP109]], <3 x double>* [[TMP114]], align 8 -; CHECK-NEXT: [[TMP115:%.*]] = getelementptr double, double* [[TMP111]], i64 6 -; CHECK-NEXT: [[TMP116:%.*]] = bitcast double* [[TMP115]] to <3 x double>* -; CHECK-NEXT: store <3 x double> [[TMP110]], <3 x double>* [[TMP116]], align 8 -; CHECK-NEXT: ret void -; + entry: %a = load <9 x double>, <9 x double>* %A.Ptr, align 8 %b = load <9 x double>, <9 x double>* %B.Ptr, align 8 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll index 69bc882caaa8..62432e9611af 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll @@ -17,52 +17,52 @@ define void @test(i32 %r, i32 %c) { ; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr ([5 x <4 x double>], [5 x <4 x double>]* @foo, i32 0, i32 0, i64 2) to <2 x double>*), align 8 ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK2:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT3:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT4:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT3]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT3:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT4:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT3]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[BLOCK2]], [[SPLAT_SPLAT4]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> undef, double [[TMP7]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = fmul <1 x double> [[BLOCK5]], [[SPLAT_SPLAT7]] ; CHECK-NEXT: [[BLOCK8:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> undef, double [[TMP9]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[BLOCK8]], [[SPLAT_SPLAT10]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> ; CHECK-NEXT: [[BLOCK11:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = fmul <1 x double> [[BLOCK11]], [[SPLAT_SPLAT13]] ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> undef, double [[TMP16]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> undef, double [[TMP21]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]] ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <2 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll index 5cbd9600592a..8ccf1dde632d 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll @@ -45,76 +45,76 @@ define void @multiply_sub_add_2x3_3x2(<6 x double>* %a.ptr, <6 x double>* %b.ptr ; RM-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[VEC_CAST18]], align 8 ; RM-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP9:%.*]] = extractelement <3 x double> [[TMP2]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP9]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]] ; RM-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP11:%.*]] = extractelement <3 x double> [[TMP2]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> undef, double [[TMP11]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP11]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP12:%.*]] = fmul <1 x double> [[SPLAT_SPLAT21]], [[BLOCK19]] ; RM-NEXT: [[TMP13:%.*]] = fadd <1 x double> [[TMP10]], [[TMP12]] ; RM-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[TMP2]], i64 2 -; RM-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP15:%.*]] = fmul <1 x double> [[SPLAT_SPLAT24]], [[BLOCK22]] ; RM-NEXT: [[TMP16:%.*]] = fadd <1 x double> [[TMP13]], [[TMP15]] ; RM-NEXT: [[TMP17:%.*]] = shufflevector <1 x double> [[TMP16]], <1 x double> undef, <2 x i32> ; RM-NEXT: [[TMP18:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP17]], <2 x i32> ; RM-NEXT: [[BLOCK25:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <1 x i32> ; RM-NEXT: [[TMP19:%.*]] = extractelement <3 x double> [[TMP2]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> undef, double [[TMP19]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP19]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP20:%.*]] = fmul <1 x double> [[SPLAT_SPLAT27]], [[BLOCK25]] ; RM-NEXT: [[BLOCK28:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> undef, <1 x i32> ; RM-NEXT: [[TMP21:%.*]] = extractelement <3 x double> [[TMP2]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x double> undef, double [[TMP21]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT30:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT29]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT30:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT29]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[SPLAT_SPLAT30]], [[BLOCK28]] ; RM-NEXT: [[TMP23:%.*]] = fadd <1 x double> [[TMP20]], [[TMP22]] ; RM-NEXT: [[BLOCK31:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> undef, <1 x i32> ; RM-NEXT: [[TMP24:%.*]] = extractelement <3 x double> [[TMP2]], i64 2 -; RM-NEXT: [[SPLAT_SPLATINSERT32:%.*]] = insertelement <1 x double> undef, double [[TMP24]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT33:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT32]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT32:%.*]] = insertelement <1 x double> poison, double [[TMP24]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT33:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT32]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP25:%.*]] = fmul <1 x double> [[SPLAT_SPLAT33]], [[BLOCK31]] ; RM-NEXT: [[TMP26:%.*]] = fadd <1 x double> [[TMP23]], [[TMP25]] ; RM-NEXT: [[TMP27:%.*]] = shufflevector <1 x double> [[TMP26]], <1 x double> undef, <2 x i32> ; RM-NEXT: [[TMP28:%.*]] = shufflevector <2 x double> [[TMP18]], <2 x double> [[TMP27]], <2 x i32> ; RM-NEXT: [[BLOCK34:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP29:%.*]] = extractelement <3 x double> [[TMP3]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT35:%.*]] = insertelement <1 x double> undef, double [[TMP29]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT36:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT35]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT35:%.*]] = insertelement <1 x double> poison, double [[TMP29]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT36:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT35]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP30:%.*]] = fmul <1 x double> [[SPLAT_SPLAT36]], [[BLOCK34]] ; RM-NEXT: [[BLOCK37:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP31:%.*]] = extractelement <3 x double> [[TMP3]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT38:%.*]] = insertelement <1 x double> undef, double [[TMP31]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT39:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT38]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT38:%.*]] = insertelement <1 x double> poison, double [[TMP31]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT39:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT38]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP32:%.*]] = fmul <1 x double> [[SPLAT_SPLAT39]], [[BLOCK37]] ; RM-NEXT: [[TMP33:%.*]] = fadd <1 x double> [[TMP30]], [[TMP32]] ; RM-NEXT: [[BLOCK40:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP34:%.*]] = extractelement <3 x double> [[TMP3]], i64 2 -; RM-NEXT: [[SPLAT_SPLATINSERT41:%.*]] = insertelement <1 x double> undef, double [[TMP34]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT42:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT41]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT41:%.*]] = insertelement <1 x double> poison, double [[TMP34]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT42:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT41]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP35:%.*]] = fmul <1 x double> [[SPLAT_SPLAT42]], [[BLOCK40]] ; RM-NEXT: [[TMP36:%.*]] = fadd <1 x double> [[TMP33]], [[TMP35]] ; RM-NEXT: [[TMP37:%.*]] = shufflevector <1 x double> [[TMP36]], <1 x double> undef, <2 x i32> ; RM-NEXT: [[TMP38:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP37]], <2 x i32> ; RM-NEXT: [[BLOCK43:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <1 x i32> ; RM-NEXT: [[TMP39:%.*]] = extractelement <3 x double> [[TMP3]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT44:%.*]] = insertelement <1 x double> undef, double [[TMP39]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT45:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT44]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT44:%.*]] = insertelement <1 x double> poison, double [[TMP39]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT45:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT44]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP40:%.*]] = fmul <1 x double> [[SPLAT_SPLAT45]], [[BLOCK43]] ; RM-NEXT: [[BLOCK46:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> undef, <1 x i32> ; RM-NEXT: [[TMP41:%.*]] = extractelement <3 x double> [[TMP3]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT47:%.*]] = insertelement <1 x double> undef, double [[TMP41]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT48:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT47]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT47:%.*]] = insertelement <1 x double> poison, double [[TMP41]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT48:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT47]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP42:%.*]] = fmul <1 x double> [[SPLAT_SPLAT48]], [[BLOCK46]] ; RM-NEXT: [[TMP43:%.*]] = fadd <1 x double> [[TMP40]], [[TMP42]] ; RM-NEXT: [[BLOCK49:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> undef, <1 x i32> ; RM-NEXT: [[TMP44:%.*]] = extractelement <3 x double> [[TMP3]], i64 2 -; RM-NEXT: [[SPLAT_SPLATINSERT50:%.*]] = insertelement <1 x double> undef, double [[TMP44]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT51:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT50]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT50:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT51:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT50]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP45:%.*]] = fmul <1 x double> [[SPLAT_SPLAT51]], [[BLOCK49]] ; RM-NEXT: [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]] ; RM-NEXT: [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> undef, <2 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll index 1b1e8dcccfbd..ece51aa49682 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll @@ -12,49 +12,49 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> undef, double [[TMP6]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = fmul <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]] ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> undef, double [[TMP8]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]]) ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> undef, double [[TMP12]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]] ; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]]) ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP16]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> undef, double [[TMP18]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> undef, double [[TMP20]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]]) ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> [[TMP22]], <2 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll index 6245996827fa..90be5a8b92cf 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll @@ -12,49 +12,49 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> undef, double [[TMP6]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = fmul <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]] ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> undef, double [[TMP8]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]]) ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> undef, double [[TMP12]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]] ; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]]) ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP16]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> undef, double [[TMP18]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> undef, double [[TMP20]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]]) ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> [[TMP22]], <2 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll index 6f779964da0e..d70b28394992 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --verbose +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -lower-matrix-intrinsics -matrix-default-layout=row-major -S < %s | FileCheck --check-prefix=RM %s @@ -13,52 +13,52 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; RM-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> undef, <2 x i32> ; RM-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]] ; RM-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[SPLAT_SPLAT6]], [[BLOCK4]] ; RM-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] ; RM-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <2 x i32> ; RM-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> ; RM-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> ; RM-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> undef, double [[TMP7]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP8:%.*]] = fmul <1 x double> [[SPLAT_SPLAT9]], [[BLOCK7]] ; RM-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> undef, <1 x i32> ; RM-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> undef, double [[TMP9]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT12]], [[BLOCK10]] ; RM-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] ; RM-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <2 x i32> ; RM-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> ; RM-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP15:%.*]] = fmul <1 x double> [[SPLAT_SPLAT15]], [[BLOCK13]] ; RM-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> undef, double [[TMP16]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[SPLAT_SPLAT18]], [[BLOCK16]] ; RM-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]] ; RM-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <2 x i32> ; RM-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> ; RM-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> ; RM-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> undef, double [[TMP21]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[SPLAT_SPLAT21]], [[BLOCK19]] ; RM-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> undef, <1 x i32> ; RM-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[SPLAT_SPLAT24]], [[BLOCK22]] ; RM-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] ; RM-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <2 x i32> @@ -74,7 +74,6 @@ entry: declare <4 x double> @llvm.matrix.multiply.v4f64.v4f64.v4f64(<4 x double>, <4 x double>, i32, i32, i32) define <4 x double> @multiply_1x2(<2 x double> %a, <2 x double> %b) { - ; RM-LABEL: @multiply_1x2( ; RM-NEXT: entry: ; RM-NEXT: [[SPLIT:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> undef, <1 x i32> zeroinitializer @@ -82,29 +81,29 @@ define <4 x double> @multiply_1x2(<2 x double> %a, <2 x double> %b) { ; RM-NEXT: [[SPLIT2:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> undef, <2 x i32> ; RM-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP0:%.*]] = extractelement <1 x double> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]] ; RM-NEXT: [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> undef, <2 x i32> ; RM-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP2]], <2 x i32> ; RM-NEXT: [[BLOCK3:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> ; RM-NEXT: [[TMP4:%.*]] = extractelement <1 x double> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> undef, double [[TMP4]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP5:%.*]] = fmul <1 x double> [[SPLAT_SPLAT5]], [[BLOCK3]] ; RM-NEXT: [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> undef, <2 x i32> ; RM-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <2 x i32> ; RM-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP8:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> undef, double [[TMP8]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP9:%.*]] = fmul <1 x double> [[SPLAT_SPLAT8]], [[BLOCK6]] ; RM-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> undef, <2 x i32> ; RM-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP10]], <2 x i32> ; RM-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> ; RM-NEXT: [[TMP12:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> undef, double [[TMP12]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP13:%.*]] = fmul <1 x double> [[SPLAT_SPLAT11]], [[BLOCK9]] ; RM-NEXT: [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> undef, <2 x i32> ; RM-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP14]], <2 x i32> @@ -128,117 +127,117 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; RM-NEXT: [[SPLIT4:%.*]] = shufflevector <6 x double> [[B]], <6 x double> undef, <3 x i32> ; RM-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]] ; RM-NEXT: [[BLOCK5:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[SPLAT_SPLAT7]], [[BLOCK5]] ; RM-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] ; RM-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <3 x i32> ; RM-NEXT: [[TMP6:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP5]], <3 x i32> ; RM-NEXT: [[BLOCK8:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> ; RM-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> undef, double [[TMP7]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP8:%.*]] = fmul <1 x double> [[SPLAT_SPLAT10]], [[BLOCK8]] ; RM-NEXT: [[BLOCK11:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> ; RM-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> undef, double [[TMP9]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT13]], [[BLOCK11]] ; RM-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] ; RM-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <3 x i32> ; RM-NEXT: [[TMP13:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP12]], <3 x i32> ; RM-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> ; RM-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP15:%.*]] = fmul <1 x double> [[SPLAT_SPLAT16]], [[BLOCK14]] ; RM-NEXT: [[BLOCK17:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> ; RM-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> undef, double [[TMP16]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[SPLAT_SPLAT19]], [[BLOCK17]] ; RM-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]] ; RM-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <3 x i32> ; RM-NEXT: [[TMP20:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> [[TMP19]], <3 x i32> ; RM-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> undef, double [[TMP21]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[SPLAT_SPLAT22]], [[BLOCK20]] ; RM-NEXT: [[BLOCK23:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[SPLAT_SPLAT25]], [[BLOCK23]] ; RM-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] ; RM-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <3 x i32> ; RM-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> ; RM-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> ; RM-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> undef, double [[TMP28]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP29:%.*]] = fmul <1 x double> [[SPLAT_SPLAT28]], [[BLOCK26]] ; RM-NEXT: [[BLOCK29:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> ; RM-NEXT: [[TMP30:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> undef, double [[TMP30]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[SPLAT_SPLAT31]], [[BLOCK29]] ; RM-NEXT: [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]] ; RM-NEXT: [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> undef, <3 x i32> ; RM-NEXT: [[TMP34:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP33]], <3 x i32> ; RM-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> ; RM-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> undef, double [[TMP35]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP36:%.*]] = fmul <1 x double> [[SPLAT_SPLAT34]], [[BLOCK32]] ; RM-NEXT: [[BLOCK35:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> ; RM-NEXT: [[TMP37:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> undef, double [[TMP37]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP38:%.*]] = fmul <1 x double> [[SPLAT_SPLAT37]], [[BLOCK35]] ; RM-NEXT: [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]] ; RM-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> undef, <3 x i32> ; RM-NEXT: [[TMP41:%.*]] = shufflevector <3 x double> [[TMP34]], <3 x double> [[TMP40]], <3 x i32> ; RM-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP42:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> undef, double [[TMP42]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP43:%.*]] = fmul <1 x double> [[SPLAT_SPLAT40]], [[BLOCK38]] ; RM-NEXT: [[BLOCK41:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP44:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> undef, double [[TMP44]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP45:%.*]] = fmul <1 x double> [[SPLAT_SPLAT43]], [[BLOCK41]] ; RM-NEXT: [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]] ; RM-NEXT: [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> undef, <3 x i32> ; RM-NEXT: [[TMP48:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP47]], <3 x i32> ; RM-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> ; RM-NEXT: [[TMP49:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> undef, double [[TMP49]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP49]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP50:%.*]] = fmul <1 x double> [[SPLAT_SPLAT46]], [[BLOCK44]] ; RM-NEXT: [[BLOCK47:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> ; RM-NEXT: [[TMP51:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> undef, double [[TMP51]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP51]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP52:%.*]] = fmul <1 x double> [[SPLAT_SPLAT49]], [[BLOCK47]] ; RM-NEXT: [[TMP53:%.*]] = fadd <1 x double> [[TMP50]], [[TMP52]] ; RM-NEXT: [[TMP54:%.*]] = shufflevector <1 x double> [[TMP53]], <1 x double> undef, <3 x i32> ; RM-NEXT: [[TMP55:%.*]] = shufflevector <3 x double> [[TMP48]], <3 x double> [[TMP54]], <3 x i32> ; RM-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> ; RM-NEXT: [[TMP56:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> undef, double [[TMP56]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP56]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP57:%.*]] = fmul <1 x double> [[SPLAT_SPLAT52]], [[BLOCK50]] ; RM-NEXT: [[BLOCK53:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> ; RM-NEXT: [[TMP58:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> undef, double [[TMP58]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP59:%.*]] = fmul <1 x double> [[SPLAT_SPLAT55]], [[BLOCK53]] ; RM-NEXT: [[TMP60:%.*]] = fadd <1 x double> [[TMP57]], [[TMP59]] ; RM-NEXT: [[TMP61:%.*]] = shufflevector <1 x double> [[TMP60]], <1 x double> undef, <3 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll index 4e2e17e7769e..496234772b62 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll @@ -12,52 +12,52 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[BLOCK4]], [[SPLAT_SPLAT6]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> undef, double [[TMP7]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = fmul <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]] ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> undef, double [[TMP9]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[BLOCK10]], [[SPLAT_SPLAT12]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]] ; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> undef, double [[TMP16]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[BLOCK16]], [[SPLAT_SPLAT18]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> undef, double [[TMP21]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <2 x i32> @@ -80,29 +80,29 @@ define <4 x double> @multiply_1x2(<2 x double> %a, <2 x double> %b) { ; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <2 x double> [[B]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[BLOCK3:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> undef, double [[TMP4]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = fmul <1 x double> [[BLOCK3]], [[SPLAT_SPLAT5]] ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <1 x double> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> undef, double [[TMP8]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = fmul <1 x double> [[BLOCK6]], [[SPLAT_SPLAT8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <1 x double> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> undef, double [[TMP12]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]] ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP14]], <2 x i32> @@ -126,117 +126,117 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <6 x double> [[B]], <6 x double> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK5:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[BLOCK5]], [[SPLAT_SPLAT7]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP5]], <3 x i32> ; CHECK-NEXT: [[BLOCK8:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> undef, double [[TMP7]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = fmul <1 x double> [[BLOCK8]], [[SPLAT_SPLAT10]] ; CHECK-NEXT: [[BLOCK11:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> undef, double [[TMP9]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[BLOCK11]], [[SPLAT_SPLAT13]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP12]], <3 x i32> ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]] ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> undef, double [[TMP16]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> [[TMP19]], <3 x i32> ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> undef, double [[TMP21]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]] ; CHECK-NEXT: [[BLOCK23:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK23]], [[SPLAT_SPLAT25]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> ; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> undef, double [[TMP28]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP29:%.*]] = fmul <1 x double> [[BLOCK26]], [[SPLAT_SPLAT28]] ; CHECK-NEXT: [[BLOCK29:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> undef, double [[TMP30]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[BLOCK29]], [[SPLAT_SPLAT31]] ; CHECK-NEXT: [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP33]], <3 x i32> ; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> undef, double [[TMP35]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP36:%.*]] = fmul <1 x double> [[BLOCK32]], [[SPLAT_SPLAT34]] ; CHECK-NEXT: [[BLOCK35:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> undef, double [[TMP37]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = fmul <1 x double> [[BLOCK35]], [[SPLAT_SPLAT37]] ; CHECK-NEXT: [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <3 x double> [[TMP34]], <3 x double> [[TMP40]], <3 x i32> ; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> undef, double [[TMP42]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP43:%.*]] = fmul <1 x double> [[BLOCK38]], [[SPLAT_SPLAT40]] ; CHECK-NEXT: [[BLOCK41:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> undef, double [[TMP44]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP45:%.*]] = fmul <1 x double> [[BLOCK41]], [[SPLAT_SPLAT43]] ; CHECK-NEXT: [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP47]], <3 x i32> ; CHECK-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> undef, double [[TMP49]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP49]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP50:%.*]] = fmul <1 x double> [[BLOCK44]], [[SPLAT_SPLAT46]] ; CHECK-NEXT: [[BLOCK47:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> undef, double [[TMP51]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP51]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP52:%.*]] = fmul <1 x double> [[BLOCK47]], [[SPLAT_SPLAT49]] ; CHECK-NEXT: [[TMP53:%.*]] = fadd <1 x double> [[TMP50]], [[TMP52]] ; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <1 x double> [[TMP53]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <3 x double> [[TMP48]], <3 x double> [[TMP54]], <3 x i32> ; CHECK-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> undef, double [[TMP56]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP56]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP57:%.*]] = fmul <1 x double> [[BLOCK50]], [[SPLAT_SPLAT52]] ; CHECK-NEXT: [[BLOCK53:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> ; CHECK-NEXT: [[TMP58:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> undef, double [[TMP58]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP59:%.*]] = fmul <1 x double> [[BLOCK53]], [[SPLAT_SPLAT55]] ; CHECK-NEXT: [[TMP60:%.*]] = fadd <1 x double> [[TMP57]], [[TMP59]] ; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <1 x double> [[TMP60]], <1 x double> undef, <3 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll index f9ac72937806..a4b37450417a 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll @@ -12,49 +12,49 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> undef, float [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP4]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> undef, float [[TMP6]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = fmul <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]] ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> undef, float [[TMP8]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP7]]) ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> undef, float [[TMP12]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]] ; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> undef, float [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP13]]) ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP16]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> undef, float [[TMP18]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP18]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> undef, float [[TMP20]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP20]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP19]]) ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x float> [[TMP17]], <2 x float> [[TMP22]], <2 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll index 2e2cecfdd8cf..2663474c2a1e 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll @@ -12,49 +12,49 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> undef, float [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP4]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> undef, float [[TMP6]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = fmul <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]] ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> undef, float [[TMP8]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP7]]) ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> undef, float [[TMP12]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]] ; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> undef, float [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP13]]) ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP16]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> undef, float [[TMP18]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP18]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> undef, float [[TMP20]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP20]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP19]]) ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x float> [[TMP17]], <2 x float> [[TMP22]], <2 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll index 427e42fbf689..9d53dc670c04 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll @@ -12,52 +12,52 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> undef, float [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x float> [[BLOCK4]], [[SPLAT_SPLAT6]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x float> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> undef, float [[TMP7]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP7]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = fmul <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]] ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> undef, float [[TMP9]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x float> [[BLOCK10]], [[SPLAT_SPLAT12]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x float> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x float> [[TMP11]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP12]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> undef, float [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = fmul <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]] ; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> undef, float [[TMP16]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP16]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x float> [[BLOCK16]], [[SPLAT_SPLAT18]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x float> [[TMP15]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x float> [[TMP18]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP19]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> undef, float [[TMP21]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = fmul <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> undef, float [[TMP23]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP23]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x float> [[BLOCK22]], [[SPLAT_SPLAT24]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x float> [[TMP22]], [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x float> [[TMP25]], <1 x float> undef, <2 x i32> @@ -80,29 +80,29 @@ define <4 x float> @multiply_1x2(<2 x float> %a, <2 x float> %b) { ; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <2 x float> [[B]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x float> [[SPLIT1]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> undef, float [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[BLOCK3:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <1 x float> [[SPLIT1]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x float> undef, float [[TMP4]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT4]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x float> poison, float [[TMP4]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT4]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = fmul <1 x float> [[BLOCK3]], [[SPLAT_SPLAT5]] ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x float> [[TMP5]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <1 x float> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x float> undef, float [[TMP8]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT7]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT7]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = fmul <1 x float> [[BLOCK6]], [[SPLAT_SPLAT8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <1 x float> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x float> undef, float [[TMP12]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT10]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT10]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x float> [[BLOCK9]], [[SPLAT_SPLAT11]] ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <1 x float> [[TMP13]], <1 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> [[TMP14]], <2 x i32> @@ -126,117 +126,117 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) { ; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <6 x float> [[B]], <6 x float> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> undef, float [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK5:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT6]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT6]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x float> [[BLOCK5]], [[SPLAT_SPLAT7]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x float> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> undef, <3 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <3 x float> undef, <3 x float> [[TMP5]], <3 x i32> ; CHECK-NEXT: [[BLOCK8:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x float> undef, float [[TMP7]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT9]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x float> poison, float [[TMP7]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT9]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = fmul <1 x float> [[BLOCK8]], [[SPLAT_SPLAT10]] ; CHECK-NEXT: [[BLOCK11:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x float> undef, float [[TMP9]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT12]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT12]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x float> [[BLOCK11]], [[SPLAT_SPLAT13]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x float> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x float> [[TMP11]], <1 x float> undef, <3 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <3 x float> [[TMP6]], <3 x float> [[TMP12]], <3 x i32> ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x float> undef, float [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT15]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT15]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = fmul <1 x float> [[BLOCK14]], [[SPLAT_SPLAT16]] ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x float> undef, float [[TMP16]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT18]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x float> poison, float [[TMP16]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT18]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x float> [[BLOCK17]], [[SPLAT_SPLAT19]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x float> [[TMP15]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x float> [[TMP18]], <1 x float> undef, <3 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <3 x float> [[TMP13]], <3 x float> [[TMP19]], <3 x i32> ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x float> undef, float [[TMP21]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT21]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT21]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = fmul <1 x float> [[BLOCK20]], [[SPLAT_SPLAT22]] ; CHECK-NEXT: [[BLOCK23:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x float> undef, float [[TMP23]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT24]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x float> poison, float [[TMP23]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT24]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x float> [[BLOCK23]], [[SPLAT_SPLAT25]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x float> [[TMP22]], [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x float> [[TMP25]], <1 x float> undef, <3 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x float> undef, <3 x float> [[TMP26]], <3 x i32> ; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x float> undef, float [[TMP28]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT27]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x float> poison, float [[TMP28]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT27]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP29:%.*]] = fmul <1 x float> [[BLOCK26]], [[SPLAT_SPLAT28]] ; CHECK-NEXT: [[BLOCK29:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x float> undef, float [[TMP30]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT30]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x float> poison, float [[TMP30]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT30]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x float> [[BLOCK29]], [[SPLAT_SPLAT31]] ; CHECK-NEXT: [[TMP32:%.*]] = fadd <1 x float> [[TMP29]], [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <1 x float> [[TMP32]], <1 x float> undef, <3 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <3 x float> [[TMP27]], <3 x float> [[TMP33]], <3 x i32> ; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x float> undef, float [[TMP35]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT33]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x float> poison, float [[TMP35]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT33]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP36:%.*]] = fmul <1 x float> [[BLOCK32]], [[SPLAT_SPLAT34]] ; CHECK-NEXT: [[BLOCK35:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x float> undef, float [[TMP37]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT36]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x float> poison, float [[TMP37]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT36]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = fmul <1 x float> [[BLOCK35]], [[SPLAT_SPLAT37]] ; CHECK-NEXT: [[TMP39:%.*]] = fadd <1 x float> [[TMP36]], [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x float> [[TMP39]], <1 x float> undef, <3 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <3 x float> [[TMP34]], <3 x float> [[TMP40]], <3 x i32> ; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x float> undef, float [[TMP42]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT39]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x float> poison, float [[TMP42]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT39]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP43:%.*]] = fmul <1 x float> [[BLOCK38]], [[SPLAT_SPLAT40]] ; CHECK-NEXT: [[BLOCK41:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x float> undef, float [[TMP44]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT42]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x float> poison, float [[TMP44]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT42]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP45:%.*]] = fmul <1 x float> [[BLOCK41]], [[SPLAT_SPLAT43]] ; CHECK-NEXT: [[TMP46:%.*]] = fadd <1 x float> [[TMP43]], [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <1 x float> [[TMP46]], <1 x float> undef, <3 x i32> ; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <3 x float> undef, <3 x float> [[TMP47]], <3 x i32> ; CHECK-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x float> undef, float [[TMP49]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT45]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x float> poison, float [[TMP49]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT45]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP50:%.*]] = fmul <1 x float> [[BLOCK44]], [[SPLAT_SPLAT46]] ; CHECK-NEXT: [[BLOCK47:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x float> undef, float [[TMP51]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT48]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x float> poison, float [[TMP51]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT48]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP52:%.*]] = fmul <1 x float> [[BLOCK47]], [[SPLAT_SPLAT49]] ; CHECK-NEXT: [[TMP53:%.*]] = fadd <1 x float> [[TMP50]], [[TMP52]] ; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <1 x float> [[TMP53]], <1 x float> undef, <3 x i32> ; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <3 x float> [[TMP48]], <3 x float> [[TMP54]], <3 x i32> ; CHECK-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x float> undef, float [[TMP56]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT51]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x float> poison, float [[TMP56]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT51]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP57:%.*]] = fmul <1 x float> [[BLOCK50]], [[SPLAT_SPLAT52]] ; CHECK-NEXT: [[BLOCK53:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> ; CHECK-NEXT: [[TMP58:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x float> undef, float [[TMP58]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT54]], <1 x float> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x float> poison, float [[TMP58]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT54]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP59:%.*]] = fmul <1 x float> [[BLOCK53]], [[SPLAT_SPLAT55]] ; CHECK-NEXT: [[TMP60:%.*]] = fadd <1 x float> [[TMP57]], [[TMP59]] ; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <1 x float> [[TMP60]], <1 x float> undef, <3 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll index d0503b7371d4..ebc3a266a29a 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll @@ -22,50 +22,50 @@ define void @multiply_noalias_4x4(<16 x double>* noalias %A, <16 x double>* noal ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP11:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP7:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: -; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[INNER_IV]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <16 x double>, <16 x double>* [[A:%.*]], i64 0, i64 [[TMP3]] -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP4]] to <2 x double>* +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INNER_IV]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x double>, <16 x double>* [[A:%.*]], i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP2]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP4]], i64 4 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP2]], i64 4 ; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[COLS_IV]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], [[INNER_IV]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <16 x double>, <16 x double>* [[B:%.*]], i64 0, i64 [[TMP6]] -; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP7]] to <2 x double>* +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[COLS_IV]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <16 x double>, <16 x double>* [[B:%.*]], i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP5]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST4]], align 8 -; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP7]], i64 4 +; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP5]], i64 4 ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD5]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[TMP0]]) +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD5]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[RESULT_VEC_0]]) ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[COL_LOAD5]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP9]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP8]]) -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD8]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP1]]) +; CHECK-NEXT: [[TMP7]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP6]]) +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD8]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[RESULT_VEC_1]]) ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[COL_LOAD8]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP11]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP10]]) +; CHECK-NEXT: [[TMP9]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP8]]) ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_STEP]], 4 -; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], [[LOOP0:!llvm.loop !.*]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND_NOT:%.*]] = icmp eq i64 [[ROWS_STEP]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[COLS_IV]], 2 -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <16 x double>, <16 x double>* [[C:%.*]], i64 0, i64 [[TMP13]] -; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[TMP14]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[VEC_CAST21]], align 8 -; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr double, double* [[TMP14]], i64 4 +; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[COLS_IV]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr <16 x double>, <16 x double>* [[C:%.*]], i64 0, i64 [[TMP11]] +; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[TMP12]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[VEC_CAST21]], align 8 +; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr double, double* [[TMP12]], i64 4 ; CHECK-NEXT: [[VEC_CAST23:%.*]] = bitcast double* [[VEC_GEP22]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[VEC_CAST23]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[VEC_CAST23]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND_NOT]], label [[COLS_LATCH]], label [[ROWS_HEADER]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 @@ -104,54 +104,54 @@ define void @multiply_noalias_2x4(<8 x i64>* noalias %A, <8 x i64>* noalias %B, ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP11:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP15:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP13:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: -; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[INNER_IV]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <8 x i64>, <8 x i64>* [[A:%.*]], i64 0, i64 [[TMP3]] -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>* +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INNER_IV]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <8 x i64>, <8 x i64>* [[A:%.*]], i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast i64* [[TMP2]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i64, i64* [[TMP4]], i64 2 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i64, i64* [[TMP2]], i64 2 ; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast i64* [[VEC_GEP]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST1]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[COLS_IV]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], [[INNER_IV]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <8 x i64>, <8 x i64>* [[B:%.*]], i64 0, i64 [[TMP6]] -; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i64* [[TMP7]] to <2 x i64>* +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[COLS_IV]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <8 x i64>, <8 x i64>* [[B:%.*]], i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST4]], align 8 -; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr i64, i64* [[TMP7]], i64 4 +; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr i64, i64* [[TMP5]], i64 4 ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast i64* [[VEC_GEP6]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST7]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP0]], [[TMP8]] +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[RESULT_VEC_0]], [[TMP6]] ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> undef, <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT12]] -; CHECK-NEXT: [[TMP11]] = add <2 x i64> [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT16]] -; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i64> [[TMP1]], [[TMP12]] +; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT12]] +; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT16]] +; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[RESULT_VEC_1]], [[TMP10]] ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> undef, <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT19]] -; CHECK-NEXT: [[TMP15]] = add <2 x i64> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT19]] +; CHECK-NEXT: [[TMP13]] = add <2 x i64> [[TMP11]], [[TMP12]] ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_STEP]], 4 -; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], [[LOOP2:!llvm.loop !.*]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND_NOT:%.*]] = icmp eq i64 [[ROWS_IV]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = shl i64 [[COLS_IV]], 1 -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[C:%.*]], i64 0, i64 [[TMP17]] -; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast i64* [[TMP18]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* [[VEC_CAST21]], align 8 -; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr i64, i64* [[TMP18]], i64 2 +; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[COLS_IV]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[C:%.*]], i64 0, i64 [[TMP15]] +; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast i64* [[TMP16]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[VEC_CAST21]], align 8 +; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 2 ; CHECK-NEXT: [[VEC_CAST23:%.*]] = bitcast i64* [[VEC_GEP22]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[VEC_CAST23]], align 8 +; CHECK-NEXT: store <2 x i64> [[TMP13]], <2 x i64>* [[VEC_CAST23]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND_NOT]], label [[COLS_LATCH]], label [[ROWS_HEADER]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 @@ -196,54 +196,54 @@ define void @multiply_noalias_4x2_2x8(<8 x i64>* noalias %A, <16 x i64>* noalias ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP11:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP15:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP13:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: -; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[INNER_IV]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <8 x i64>, <8 x i64>* [[A:%.*]], i64 0, i64 [[TMP3]] -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>* +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INNER_IV]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <8 x i64>, <8 x i64>* [[A:%.*]], i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast i64* [[TMP2]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i64, i64* [[TMP4]], i64 4 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i64, i64* [[TMP2]], i64 4 ; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast i64* [[VEC_GEP]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST1]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[COLS_IV]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], [[INNER_IV]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <16 x i64>, <16 x i64>* [[B:%.*]], i64 0, i64 [[TMP6]] -; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i64* [[TMP7]] to <2 x i64>* +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[COLS_IV]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <16 x i64>, <16 x i64>* [[B:%.*]], i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST4]], align 8 -; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr i64, i64* [[TMP7]], i64 2 +; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr i64, i64* [[TMP5]], i64 2 ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast i64* [[VEC_GEP6]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST7]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP0]], [[TMP8]] +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[RESULT_VEC_0]], [[TMP6]] ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> undef, <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT12]] -; CHECK-NEXT: [[TMP11]] = add <2 x i64> [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT16]] -; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i64> [[TMP1]], [[TMP12]] +; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT12]] +; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT16]] +; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[RESULT_VEC_1]], [[TMP10]] ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> undef, <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT19]] -; CHECK-NEXT: [[TMP15]] = add <2 x i64> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT19]] +; CHECK-NEXT: [[TMP13]] = add <2 x i64> [[TMP11]], [[TMP12]] ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_IV]], 0 -; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop !3 +; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], [[LOOP3:!llvm.loop !.*]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND_NOT:%.*]] = icmp eq i64 [[ROWS_STEP]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = shl i64 [[COLS_IV]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr <32 x i64>, <32 x i64>* [[C:%.*]], i64 0, i64 [[TMP17]] -; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast i64* [[TMP18]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* [[VEC_CAST21]], align 8 -; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr i64, i64* [[TMP18]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[COLS_IV]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <32 x i64>, <32 x i64>* [[C:%.*]], i64 0, i64 [[TMP15]] +; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast i64* [[TMP16]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[VEC_CAST21]], align 8 +; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 4 ; CHECK-NEXT: [[VEC_CAST23:%.*]] = bitcast i64* [[VEC_GEP22]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[VEC_CAST23]], align 8 +; CHECK-NEXT: store <2 x i64> [[TMP13]], <2 x i64>* [[VEC_CAST23]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND_NOT]], label [[COLS_LATCH]], label [[ROWS_HEADER]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 @@ -323,50 +323,50 @@ define void @multiply_alias_2x2(<4 x float>* %A, <4 x float>* %B, <4 x float>* % ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x float> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP21:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x float> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP23:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x float> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP19:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x float> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP21:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: -; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[INNER_IV]], 1 -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <4 x float>, <4 x float>* [[TMP5]], i64 0, i64 [[TMP15]] -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast float* [[TMP16]] to <2 x float>* +; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[INNER_IV]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <4 x float>, <4 x float>* [[TMP5]], i64 0, i64 [[TMP13]] +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast float* [[TMP14]] to <2 x float>* ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST]], align 4 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, float* [[TMP16]], i64 2 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, float* [[TMP14]], i64 2 ; CHECK-NEXT: [[VEC_CAST8:%.*]] = bitcast float* [[VEC_GEP]] to <2 x float>* ; CHECK-NEXT: [[COL_LOAD9:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST8]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[COLS_IV]], 1 -; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], [[INNER_IV]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr <4 x float>, <4 x float>* [[TMP11]], i64 0, i64 [[TMP18]] -; CHECK-NEXT: [[VEC_CAST11:%.*]] = bitcast float* [[TMP19]] to <2 x float>* +; CHECK-NEXT: [[TMP15:%.*]] = shl i64 [[COLS_IV]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP15]], [[INNER_IV]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr <4 x float>, <4 x float>* [[TMP11]], i64 0, i64 [[TMP16]] +; CHECK-NEXT: [[VEC_CAST11:%.*]] = bitcast float* [[TMP17]] to <2 x float>* ; CHECK-NEXT: [[COL_LOAD12:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST11]], align 4 -; CHECK-NEXT: [[VEC_GEP13:%.*]] = getelementptr float, float* [[TMP19]], i64 2 +; CHECK-NEXT: [[VEC_GEP13:%.*]] = getelementptr float, float* [[TMP17]], i64 2 ; CHECK-NEXT: [[VEC_CAST14:%.*]] = bitcast float* [[VEC_GEP13]] to <2 x float>* ; CHECK-NEXT: [[COL_LOAD15:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST14]], align 4 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x float> [[COL_LOAD12]], <2 x float> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT]], <2 x float> [[TMP12]]) +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x float> [[COL_LOAD12]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT]], <2 x float> [[RESULT_VEC_0]]) ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x float> [[COL_LOAD12]], <2 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP21]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT19]], <2 x float> [[TMP20]]) -; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <2 x float> [[COL_LOAD15]], <2 x float> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT23]], <2 x float> [[TMP13]]) +; CHECK-NEXT: [[TMP19]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT19]], <2 x float> [[TMP18]]) +; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <2 x float> [[COL_LOAD15]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT23]], <2 x float> [[RESULT_VEC_1]]) ; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <2 x float> [[COL_LOAD15]], <2 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP23]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT26]], <2 x float> [[TMP22]]) +; CHECK-NEXT: [[TMP21]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT26]], <2 x float> [[TMP20]]) ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_IV]], 0 -; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop !5 +; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], [[LOOP5:!llvm.loop !.*]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND_NOT:%.*]] = icmp eq i64 [[ROWS_IV]], 0 -; CHECK-NEXT: [[TMP24:%.*]] = shl i64 [[COLS_IV]], 1 -; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[TMP24]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr <4 x float>, <4 x float>* [[C]], i64 0, i64 [[TMP25]] -; CHECK-NEXT: [[VEC_CAST28:%.*]] = bitcast float* [[TMP26]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP21]], <2 x float>* [[VEC_CAST28]], align 8 -; CHECK-NEXT: [[VEC_GEP29:%.*]] = getelementptr float, float* [[TMP26]], i64 2 +; CHECK-NEXT: [[TMP22:%.*]] = shl i64 [[COLS_IV]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr <4 x float>, <4 x float>* [[C]], i64 0, i64 [[TMP23]] +; CHECK-NEXT: [[VEC_CAST28:%.*]] = bitcast float* [[TMP24]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[TMP19]], <2 x float>* [[VEC_CAST28]], align 8 +; CHECK-NEXT: [[VEC_GEP29:%.*]] = getelementptr float, float* [[TMP24]], i64 2 ; CHECK-NEXT: [[VEC_CAST30:%.*]] = bitcast float* [[VEC_GEP29]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP23]], <2 x float>* [[VEC_CAST30]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP21]], <2 x float>* [[VEC_CAST30]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND_NOT]], label [[COLS_LATCH]], label [[ROWS_HEADER]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-multiple-blocks.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-multiple-blocks.ll index 37a1f487b308..e3320cbb9c68 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-multiple-blocks.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-multiple-blocks.ll @@ -9,27 +9,27 @@ target triple = "aarch64-apple-ios" define void @test(<6 x double> * %A, <6 x double> * %B, <9 x double>* %C, i1 %cond) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[COL_CAST196:%.*]] = bitcast <6 x double>* [[A:%.*]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD197:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST196]], align 8 -; CHECK-NEXT: [[COL_GEP198:%.*]] = getelementptr <6 x double>, <6 x double>* [[A]], i64 0, i64 3 -; CHECK-NEXT: [[COL_CAST199:%.*]] = bitcast double* [[COL_GEP198]] to <3 x double>* -; CHECK-NEXT: [[COL_LOAD200:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST199]], align 8 -; CHECK-NEXT: [[COL_CAST201:%.*]] = bitcast <6 x double>* [[B:%.*]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD202:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST201]], align 8 -; CHECK-NEXT: [[COL_GEP203:%.*]] = getelementptr <6 x double>, <6 x double>* [[B]], i64 0, i64 2 -; CHECK-NEXT: [[COL_CAST204:%.*]] = bitcast double* [[COL_GEP203]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD205:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST204]], align 8 -; CHECK-NEXT: [[COL_GEP206:%.*]] = getelementptr <6 x double>, <6 x double>* [[B]], i64 0, i64 4 -; CHECK-NEXT: [[COL_CAST207:%.*]] = bitcast double* [[COL_GEP206]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD208:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST207]], align 8 -; CHECK-NEXT: [[ST_B:%.*]] = ptrtoint <9 x double>* [[C:%.*]] to i64 -; CHECK-NEXT: [[ST_E:%.*]] = add nuw nsw i64 [[ST_B]], 72 -; CHECK-NEXT: [[LD_B:%.*]] = ptrtoint <6 x double>* [[A]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[ST_E]], [[LD_B]] +; CHECK-NEXT: [[VEC_CAST195:%.*]] = bitcast <6 x double>* [[A:%.*]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD196:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST195]], align 8 +; CHECK-NEXT: [[VEC_GEP197:%.*]] = getelementptr <6 x double>, <6 x double>* [[A]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST198:%.*]] = bitcast double* [[VEC_GEP197]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD199:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST198]], align 8 +; CHECK-NEXT: [[VEC_CAST200:%.*]] = bitcast <6 x double>* [[B:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD201:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST200]], align 8 +; CHECK-NEXT: [[VEC_GEP202:%.*]] = getelementptr <6 x double>, <6 x double>* [[B]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST203:%.*]] = bitcast double* [[VEC_GEP202]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD204:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST203]], align 8 +; CHECK-NEXT: [[VEC_GEP205:%.*]] = getelementptr <6 x double>, <6 x double>* [[B]], i64 0, i64 4 +; CHECK-NEXT: [[VEC_CAST206:%.*]] = bitcast double* [[VEC_GEP205]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD207:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST206]], align 8 +; CHECK-NEXT: [[STORE_BEGIN:%.*]] = ptrtoint <9 x double>* [[C:%.*]] to i64 +; CHECK-NEXT: [[STORE_END:%.*]] = add nuw nsw i64 [[STORE_BEGIN]], 72 +; CHECK-NEXT: [[LOAD_BEGIN:%.*]] = ptrtoint <6 x double>* [[A]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[STORE_END]], [[LOAD_BEGIN]] ; CHECK-NEXT: br i1 [[TMP0]], label [[ALIAS_CONT:%.*]], label [[NO_ALIAS:%.*]] ; CHECK: alias_cont: -; CHECK-NEXT: [[LD_E:%.*]] = add nuw nsw i64 [[LD_B]], 48 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[LD_E]], [[ST_B]] +; CHECK-NEXT: [[LOAD_END:%.*]] = add nuw nsw i64 [[LOAD_BEGIN]], 48 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[LOAD_END]], [[STORE_BEGIN]] ; CHECK-NEXT: br i1 [[TMP1]], label [[COPY:%.*]], label [[NO_ALIAS]] ; CHECK: copy: ; CHECK-NEXT: [[TMP2:%.*]] = alloca <6 x double>, align 64 @@ -39,239 +39,239 @@ define void @test(<6 x double> * %A, <6 x double> * %B, <9 x double>* %C, i1 %co ; CHECK-NEXT: br label [[NO_ALIAS]] ; CHECK: no_alias: ; CHECK-NEXT: [[TMP5:%.*]] = phi <6 x double>* [ [[A]], [[ENTRY:%.*]] ], [ [[A]], [[ALIAS_CONT]] ], [ [[TMP2]], [[COPY]] ] -; CHECK-NEXT: [[ST_B1:%.*]] = ptrtoint <9 x double>* [[C]] to i64 -; CHECK-NEXT: [[ST_E2:%.*]] = add nuw nsw i64 [[ST_B1]], 72 -; CHECK-NEXT: [[LD_B6:%.*]] = ptrtoint <6 x double>* [[B]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[ST_E2]], [[LD_B6]] -; CHECK-NEXT: br i1 [[TMP6]], label [[ALIAS_CONT3:%.*]], label [[NO_ALIAS5:%.*]] +; CHECK-NEXT: [[STORE_BEGIN4:%.*]] = ptrtoint <9 x double>* [[C]] to i64 +; CHECK-NEXT: [[STORE_END5:%.*]] = add nuw nsw i64 [[STORE_BEGIN4]], 72 +; CHECK-NEXT: [[LOAD_BEGIN6:%.*]] = ptrtoint <6 x double>* [[B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[STORE_END5]], [[LOAD_BEGIN6]] +; CHECK-NEXT: br i1 [[TMP6]], label [[ALIAS_CONT1:%.*]], label [[NO_ALIAS3:%.*]] ; CHECK: alias_cont1: -; CHECK-NEXT: [[LD_E7:%.*]] = add nuw nsw i64 [[LD_B6]], 48 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[LD_E7]], [[ST_B1]] -; CHECK-NEXT: br i1 [[TMP7]], label [[COPY4:%.*]], label [[NO_ALIAS5]] +; CHECK-NEXT: [[LOAD_END7:%.*]] = add nuw nsw i64 [[LOAD_BEGIN6]], 48 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[LOAD_END7]], [[STORE_BEGIN4]] +; CHECK-NEXT: br i1 [[TMP7]], label [[COPY2:%.*]], label [[NO_ALIAS3]] ; CHECK: copy2: ; CHECK-NEXT: [[TMP8:%.*]] = alloca <6 x double>, align 64 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <6 x double>* [[TMP8]] to i8* ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <6 x double>* [[B]] to i8* ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 64 dereferenceable(48) [[TMP9]], i8* nonnull align 8 dereferenceable(48) [[TMP10]], i64 48, i1 false) -; CHECK-NEXT: br label [[NO_ALIAS5]] +; CHECK-NEXT: br label [[NO_ALIAS3]] ; CHECK: no_alias3: -; CHECK-NEXT: [[TMP11:%.*]] = phi <6 x double>* [ [[B]], [[NO_ALIAS]] ], [ [[B]], [[ALIAS_CONT3]] ], [ [[TMP8]], [[COPY4]] ] -; CHECK-NEXT: [[COL_CAST8:%.*]] = bitcast <6 x double>* [[TMP5]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST8]], align 8 -; CHECK-NEXT: [[COL_GEP:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP5]], i64 0, i64 3 -; CHECK-NEXT: [[COL_CAST9:%.*]] = bitcast double* [[COL_GEP]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD10:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST9]], align 8 -; CHECK-NEXT: [[COL_CAST12:%.*]] = bitcast <6 x double>* [[TMP11]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST12]], align 8 -; CHECK-NEXT: [[COL_GEP14:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP11]], i64 0, i64 2 -; CHECK-NEXT: [[COL_CAST15:%.*]] = bitcast double* [[COL_GEP14]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD16:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST15]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD13]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = phi <6 x double>* [ [[B]], [[NO_ALIAS]] ], [ [[B]], [[ALIAS_CONT1]] ], [ [[TMP8]], [[COPY2]] ] +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <6 x double>* [[TMP5]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP5]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST8:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD9:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST8]], align 8 +; CHECK-NEXT: [[VEC_CAST11:%.*]] = bitcast <6 x double>* [[TMP11]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD12:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST11]], align 8 +; CHECK-NEXT: [[VEC_GEP13:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP11]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST14:%.*]] = bitcast double* [[VEC_GEP13]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD15:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST14]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD12]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[COL_LOAD13]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD10]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP12]]) -; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <2 x double> [[COL_LOAD16]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = fmul <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT22]] -; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <2 x double> [[COL_LOAD16]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD10]], <2 x double> [[SPLAT_SPLAT25]], <2 x double> [[TMP14]]) -; CHECK-NEXT: [[COL_CAST27:%.*]] = bitcast <9 x double>* [[C]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP13]], <2 x double>* [[COL_CAST27]], align 8 -; CHECK-NEXT: [[COL_GEP28:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 3 -; CHECK-NEXT: [[COL_CAST29:%.*]] = bitcast double* [[COL_GEP28]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP15]], <2 x double>* [[COL_CAST29]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <2 x double> [[COL_LOAD12]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD9]], <2 x double> [[SPLAT_SPLAT18]], <2 x double> [[TMP12]]) +; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <2 x double> [[COL_LOAD15]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = fmul <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT21]] +; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <2 x double> [[COL_LOAD15]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD9]], <2 x double> [[SPLAT_SPLAT24]], <2 x double> [[TMP14]]) +; CHECK-NEXT: [[VEC_CAST26:%.*]] = bitcast <9 x double>* [[C]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP13]], <2 x double>* [[VEC_CAST26]], align 8 +; CHECK-NEXT: [[VEC_GEP27:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST28:%.*]] = bitcast double* [[VEC_GEP27]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP15]], <2 x double>* [[VEC_CAST28]], align 8 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP5]], i64 0, i64 2 -; CHECK-NEXT: [[COL_CAST31:%.*]] = bitcast double* [[TMP16]] to <1 x double>* -; CHECK-NEXT: [[COL_LOAD32:%.*]] = load <1 x double>, <1 x double>* [[COL_CAST31]], align 8 -; CHECK-NEXT: [[COL_GEP33:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP5]], i64 0, i64 5 -; CHECK-NEXT: [[COL_CAST34:%.*]] = bitcast double* [[COL_GEP33]] to <1 x double>* -; CHECK-NEXT: [[COL_LOAD35:%.*]] = load <1 x double>, <1 x double>* [[COL_CAST34]], align 8 -; CHECK-NEXT: [[COL_CAST37:%.*]] = bitcast <6 x double>* [[TMP11]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD38:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST37]], align 8 -; CHECK-NEXT: [[COL_GEP39:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP11]], i64 0, i64 2 -; CHECK-NEXT: [[COL_CAST40:%.*]] = bitcast double* [[COL_GEP39]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD41:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST40]], align 8 -; CHECK-NEXT: [[SPLAT_SPLATINSERT43:%.*]] = shufflevector <2 x double> [[COL_LOAD38]], <2 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[COL_LOAD32]], [[SPLAT_SPLATINSERT43]] -; CHECK-NEXT: [[SPLAT_SPLATINSERT46:%.*]] = shufflevector <2 x double> [[COL_LOAD38]], <2 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD35]], <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> [[TMP17]]) -; CHECK-NEXT: [[SPLAT_SPLATINSERT49:%.*]] = shufflevector <2 x double> [[COL_LOAD41]], <2 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[COL_LOAD32]], [[SPLAT_SPLATINSERT49]] -; CHECK-NEXT: [[SPLAT_SPLATINSERT52:%.*]] = shufflevector <2 x double> [[COL_LOAD41]], <2 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD35]], <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> [[TMP19]]) +; CHECK-NEXT: [[VEC_CAST30:%.*]] = bitcast double* [[TMP16]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD31:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST30]], align 8 +; CHECK-NEXT: [[VEC_GEP32:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP5]], i64 0, i64 5 +; CHECK-NEXT: [[VEC_CAST33:%.*]] = bitcast double* [[VEC_GEP32]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD34:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST33]], align 8 +; CHECK-NEXT: [[VEC_CAST36:%.*]] = bitcast <6 x double>* [[TMP11]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD37:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST36]], align 8 +; CHECK-NEXT: [[VEC_GEP38:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP11]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST39:%.*]] = bitcast double* [[VEC_GEP38]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD40:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST39]], align 8 +; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = shufflevector <2 x double> [[COL_LOAD37]], <2 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[COL_LOAD31]], [[SPLAT_SPLATINSERT42]] +; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = shufflevector <2 x double> [[COL_LOAD37]], <2 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD34]], <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> [[TMP17]]) +; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = shufflevector <2 x double> [[COL_LOAD40]], <2 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[COL_LOAD31]], [[SPLAT_SPLATINSERT48]] +; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = shufflevector <2 x double> [[COL_LOAD40]], <2 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD34]], <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> [[TMP19]]) ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 2 -; CHECK-NEXT: [[COL_CAST55:%.*]] = bitcast double* [[TMP21]] to <1 x double>* -; CHECK-NEXT: store <1 x double> [[TMP18]], <1 x double>* [[COL_CAST55]], align 8 -; CHECK-NEXT: [[COL_GEP56:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 5 -; CHECK-NEXT: [[COL_CAST57:%.*]] = bitcast double* [[COL_GEP56]] to <1 x double>* -; CHECK-NEXT: store <1 x double> [[TMP20]], <1 x double>* [[COL_CAST57]], align 8 -; CHECK-NEXT: [[COL_CAST59:%.*]] = bitcast <6 x double>* [[TMP5]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD60:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST59]], align 8 -; CHECK-NEXT: [[COL_GEP61:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP5]], i64 0, i64 3 -; CHECK-NEXT: [[COL_CAST62:%.*]] = bitcast double* [[COL_GEP61]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD63:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST62]], align 8 +; CHECK-NEXT: [[VEC_CAST54:%.*]] = bitcast double* [[TMP21]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP18]], <1 x double>* [[VEC_CAST54]], align 8 +; CHECK-NEXT: [[VEC_GEP55:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 5 +; CHECK-NEXT: [[VEC_CAST56:%.*]] = bitcast double* [[VEC_GEP55]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP20]], <1 x double>* [[VEC_CAST56]], align 8 +; CHECK-NEXT: [[VEC_CAST58:%.*]] = bitcast <6 x double>* [[TMP5]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD59:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST58]], align 8 +; CHECK-NEXT: [[VEC_GEP60:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP5]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST61:%.*]] = bitcast double* [[VEC_GEP60]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD62:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST61]], align 8 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP11]], i64 0, i64 4 -; CHECK-NEXT: [[COL_CAST65:%.*]] = bitcast double* [[TMP22]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD66:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST65]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT69:%.*]] = shufflevector <2 x double> [[COL_LOAD66]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> [[COL_LOAD60]], [[SPLAT_SPLAT69]] -; CHECK-NEXT: [[SPLAT_SPLAT72:%.*]] = shufflevector <2 x double> [[COL_LOAD66]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD63]], <2 x double> [[SPLAT_SPLAT72]], <2 x double> [[TMP23]]) +; CHECK-NEXT: [[VEC_CAST64:%.*]] = bitcast double* [[TMP22]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD65:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST64]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <2 x double> [[COL_LOAD65]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> [[COL_LOAD59]], [[SPLAT_SPLAT68]] +; CHECK-NEXT: [[SPLAT_SPLAT71:%.*]] = shufflevector <2 x double> [[COL_LOAD65]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD62]], <2 x double> [[SPLAT_SPLAT71]], <2 x double> [[TMP23]]) ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 6 -; CHECK-NEXT: [[COL_CAST74:%.*]] = bitcast double* [[TMP25]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP24]], <2 x double>* [[COL_CAST74]], align 8 +; CHECK-NEXT: [[VEC_CAST73:%.*]] = bitcast double* [[TMP25]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP24]], <2 x double>* [[VEC_CAST73]], align 8 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP5]], i64 0, i64 2 -; CHECK-NEXT: [[COL_CAST76:%.*]] = bitcast double* [[TMP26]] to <1 x double>* -; CHECK-NEXT: [[COL_LOAD77:%.*]] = load <1 x double>, <1 x double>* [[COL_CAST76]], align 8 -; CHECK-NEXT: [[COL_GEP78:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP5]], i64 0, i64 5 -; CHECK-NEXT: [[COL_CAST79:%.*]] = bitcast double* [[COL_GEP78]] to <1 x double>* -; CHECK-NEXT: [[COL_LOAD80:%.*]] = load <1 x double>, <1 x double>* [[COL_CAST79]], align 8 +; CHECK-NEXT: [[VEC_CAST75:%.*]] = bitcast double* [[TMP26]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD76:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST75]], align 8 +; CHECK-NEXT: [[VEC_GEP77:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP5]], i64 0, i64 5 +; CHECK-NEXT: [[VEC_CAST78:%.*]] = bitcast double* [[VEC_GEP77]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD79:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST78]], align 8 ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP11]], i64 0, i64 4 -; CHECK-NEXT: [[COL_CAST82:%.*]] = bitcast double* [[TMP27]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD83:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST82]], align 8 -; CHECK-NEXT: [[SPLAT_SPLATINSERT85:%.*]] = shufflevector <2 x double> [[COL_LOAD83]], <2 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP28:%.*]] = fmul <1 x double> [[COL_LOAD77]], [[SPLAT_SPLATINSERT85]] -; CHECK-NEXT: [[SPLAT_SPLATINSERT88:%.*]] = shufflevector <2 x double> [[COL_LOAD83]], <2 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP29:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD80]], <1 x double> [[SPLAT_SPLATINSERT88]], <1 x double> [[TMP28]]) +; CHECK-NEXT: [[VEC_CAST81:%.*]] = bitcast double* [[TMP27]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD82:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST81]], align 8 +; CHECK-NEXT: [[SPLAT_SPLATINSERT84:%.*]] = shufflevector <2 x double> [[COL_LOAD82]], <2 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = fmul <1 x double> [[COL_LOAD76]], [[SPLAT_SPLATINSERT84]] +; CHECK-NEXT: [[SPLAT_SPLATINSERT87:%.*]] = shufflevector <2 x double> [[COL_LOAD82]], <2 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD79]], <1 x double> [[SPLAT_SPLATINSERT87]], <1 x double> [[TMP28]]) ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 8 -; CHECK-NEXT: [[COL_CAST91:%.*]] = bitcast double* [[TMP30]] to <1 x double>* -; CHECK-NEXT: store <1 x double> [[TMP29]], <1 x double>* [[COL_CAST91]], align 8 +; CHECK-NEXT: [[VEC_CAST90:%.*]] = bitcast double* [[TMP30]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP29]], <1 x double>* [[VEC_CAST90]], align 8 ; CHECK-NEXT: br i1 [[COND:%.*]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[TMP31:%.*]] = fadd <3 x double> [[COL_LOAD197]], [[COL_LOAD197]] -; CHECK-NEXT: [[TMP32:%.*]] = fadd <3 x double> [[COL_LOAD200]], [[COL_LOAD200]] -; CHECK-NEXT: [[COL_CAST214:%.*]] = bitcast <6 x double>* [[A]] to <3 x double>* -; CHECK-NEXT: store <3 x double> [[TMP31]], <3 x double>* [[COL_CAST214]], align 8 -; CHECK-NEXT: [[COL_GEP215:%.*]] = getelementptr <6 x double>, <6 x double>* [[A]], i64 0, i64 3 -; CHECK-NEXT: [[COL_CAST216:%.*]] = bitcast double* [[COL_GEP215]] to <3 x double>* -; CHECK-NEXT: store <3 x double> [[TMP32]], <3 x double>* [[COL_CAST216]], align 8 +; CHECK-NEXT: [[TMP31:%.*]] = fadd <3 x double> [[COL_LOAD196]], [[COL_LOAD196]] +; CHECK-NEXT: [[TMP32:%.*]] = fadd <3 x double> [[COL_LOAD199]], [[COL_LOAD199]] +; CHECK-NEXT: [[VEC_CAST213:%.*]] = bitcast <6 x double>* [[A]] to <3 x double>* +; CHECK-NEXT: store <3 x double> [[TMP31]], <3 x double>* [[VEC_CAST213]], align 8 +; CHECK-NEXT: [[VEC_GEP214:%.*]] = getelementptr <6 x double>, <6 x double>* [[A]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST215:%.*]] = bitcast double* [[VEC_GEP214]] to <3 x double>* +; CHECK-NEXT: store <3 x double> [[TMP32]], <3 x double>* [[VEC_CAST215]], align 8 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: false: -; CHECK-NEXT: [[TMP33:%.*]] = fadd <2 x double> [[COL_LOAD202]], [[COL_LOAD202]] -; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x double> [[COL_LOAD205]], [[COL_LOAD205]] -; CHECK-NEXT: [[TMP35:%.*]] = fadd <2 x double> [[COL_LOAD208]], [[COL_LOAD208]] -; CHECK-NEXT: [[COL_CAST209:%.*]] = bitcast <6 x double>* [[B]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP33]], <2 x double>* [[COL_CAST209]], align 8 -; CHECK-NEXT: [[COL_GEP210:%.*]] = getelementptr <6 x double>, <6 x double>* [[B]], i64 0, i64 2 -; CHECK-NEXT: [[COL_CAST211:%.*]] = bitcast double* [[COL_GEP210]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[COL_CAST211]], align 8 -; CHECK-NEXT: [[COL_GEP212:%.*]] = getelementptr <6 x double>, <6 x double>* [[B]], i64 0, i64 4 -; CHECK-NEXT: [[COL_CAST213:%.*]] = bitcast double* [[COL_GEP212]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* [[COL_CAST213]], align 8 +; CHECK-NEXT: [[TMP33:%.*]] = fadd <2 x double> [[COL_LOAD201]], [[COL_LOAD201]] +; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x double> [[COL_LOAD204]], [[COL_LOAD204]] +; CHECK-NEXT: [[TMP35:%.*]] = fadd <2 x double> [[COL_LOAD207]], [[COL_LOAD207]] +; CHECK-NEXT: [[VEC_CAST208:%.*]] = bitcast <6 x double>* [[B]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP33]], <2 x double>* [[VEC_CAST208]], align 8 +; CHECK-NEXT: [[VEC_GEP209:%.*]] = getelementptr <6 x double>, <6 x double>* [[B]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST210:%.*]] = bitcast double* [[VEC_GEP209]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[VEC_CAST210]], align 8 +; CHECK-NEXT: [[VEC_GEP211:%.*]] = getelementptr <6 x double>, <6 x double>* [[B]], i64 0, i64 4 +; CHECK-NEXT: [[VEC_CAST212:%.*]] = bitcast double* [[VEC_GEP211]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* [[VEC_CAST212]], align 8 ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[ST_B92:%.*]] = ptrtoint <9 x double>* [[C]] to i64 -; CHECK-NEXT: [[ST_E93:%.*]] = add nuw nsw i64 [[ST_B92]], 72 -; CHECK-NEXT: [[LD_B97:%.*]] = ptrtoint <6 x double>* [[A]] to i64 -; CHECK-NEXT: [[TMP36:%.*]] = icmp ugt i64 [[ST_E93]], [[LD_B97]] -; CHECK-NEXT: br i1 [[TMP36]], label [[ALIAS_CONT94:%.*]], label [[NO_ALIAS96:%.*]] +; CHECK-NEXT: [[STORE_BEGIN94:%.*]] = ptrtoint <9 x double>* [[C]] to i64 +; CHECK-NEXT: [[STORE_END95:%.*]] = add nuw nsw i64 [[STORE_BEGIN94]], 72 +; CHECK-NEXT: [[LOAD_BEGIN96:%.*]] = ptrtoint <6 x double>* [[A]] to i64 +; CHECK-NEXT: [[TMP36:%.*]] = icmp ugt i64 [[STORE_END95]], [[LOAD_BEGIN96]] +; CHECK-NEXT: br i1 [[TMP36]], label [[ALIAS_CONT91:%.*]], label [[NO_ALIAS93:%.*]] ; CHECK: alias_cont91: -; CHECK-NEXT: [[LD_E98:%.*]] = add nuw nsw i64 [[LD_B97]], 48 -; CHECK-NEXT: [[TMP37:%.*]] = icmp ugt i64 [[LD_E98]], [[ST_B92]] -; CHECK-NEXT: br i1 [[TMP37]], label [[COPY95:%.*]], label [[NO_ALIAS96]] +; CHECK-NEXT: [[LOAD_END97:%.*]] = add nuw nsw i64 [[LOAD_BEGIN96]], 48 +; CHECK-NEXT: [[TMP37:%.*]] = icmp ugt i64 [[LOAD_END97]], [[STORE_BEGIN94]] +; CHECK-NEXT: br i1 [[TMP37]], label [[COPY92:%.*]], label [[NO_ALIAS93]] ; CHECK: copy92: ; CHECK-NEXT: [[TMP38:%.*]] = alloca <6 x double>, align 64 ; CHECK-NEXT: [[TMP39:%.*]] = bitcast <6 x double>* [[TMP38]] to i8* ; CHECK-NEXT: [[TMP40:%.*]] = bitcast <6 x double>* [[A]] to i8* ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 64 dereferenceable(48) [[TMP39]], i8* nonnull align 8 dereferenceable(48) [[TMP40]], i64 48, i1 false) -; CHECK-NEXT: br label [[NO_ALIAS96]] +; CHECK-NEXT: br label [[NO_ALIAS93]] ; CHECK: no_alias93: -; CHECK-NEXT: [[TMP41:%.*]] = phi <6 x double>* [ [[A]], [[END]] ], [ [[A]], [[ALIAS_CONT94]] ], [ [[TMP38]], [[COPY95]] ] -; CHECK-NEXT: [[ST_B99:%.*]] = ptrtoint <9 x double>* [[C]] to i64 -; CHECK-NEXT: [[ST_E100:%.*]] = add nuw nsw i64 [[ST_B99]], 72 -; CHECK-NEXT: [[LD_B104:%.*]] = ptrtoint <6 x double>* [[B]] to i64 -; CHECK-NEXT: [[TMP42:%.*]] = icmp ugt i64 [[ST_E100]], [[LD_B104]] -; CHECK-NEXT: br i1 [[TMP42]], label [[ALIAS_CONT101:%.*]], label [[NO_ALIAS103:%.*]] +; CHECK-NEXT: [[TMP41:%.*]] = phi <6 x double>* [ [[A]], [[END]] ], [ [[A]], [[ALIAS_CONT91]] ], [ [[TMP38]], [[COPY92]] ] +; CHECK-NEXT: [[STORE_BEGIN101:%.*]] = ptrtoint <9 x double>* [[C]] to i64 +; CHECK-NEXT: [[STORE_END102:%.*]] = add nuw nsw i64 [[STORE_BEGIN101]], 72 +; CHECK-NEXT: [[LOAD_BEGIN103:%.*]] = ptrtoint <6 x double>* [[B]] to i64 +; CHECK-NEXT: [[TMP42:%.*]] = icmp ugt i64 [[STORE_END102]], [[LOAD_BEGIN103]] +; CHECK-NEXT: br i1 [[TMP42]], label [[ALIAS_CONT98:%.*]], label [[NO_ALIAS100:%.*]] ; CHECK: alias_cont98: -; CHECK-NEXT: [[LD_E105:%.*]] = add nuw nsw i64 [[LD_B104]], 48 -; CHECK-NEXT: [[TMP43:%.*]] = icmp ugt i64 [[LD_E105]], [[ST_B99]] -; CHECK-NEXT: br i1 [[TMP43]], label [[COPY102:%.*]], label [[NO_ALIAS103]] +; CHECK-NEXT: [[LOAD_END104:%.*]] = add nuw nsw i64 [[LOAD_BEGIN103]], 48 +; CHECK-NEXT: [[TMP43:%.*]] = icmp ugt i64 [[LOAD_END104]], [[STORE_BEGIN101]] +; CHECK-NEXT: br i1 [[TMP43]], label [[COPY99:%.*]], label [[NO_ALIAS100]] ; CHECK: copy99: ; CHECK-NEXT: [[TMP44:%.*]] = alloca <6 x double>, align 64 ; CHECK-NEXT: [[TMP45:%.*]] = bitcast <6 x double>* [[TMP44]] to i8* ; CHECK-NEXT: [[TMP46:%.*]] = bitcast <6 x double>* [[B]] to i8* ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 64 dereferenceable(48) [[TMP45]], i8* nonnull align 8 dereferenceable(48) [[TMP46]], i64 48, i1 false) -; CHECK-NEXT: br label [[NO_ALIAS103]] +; CHECK-NEXT: br label [[NO_ALIAS100]] ; CHECK: no_alias100: -; CHECK-NEXT: [[TMP47:%.*]] = phi <6 x double>* [ [[B]], [[NO_ALIAS96]] ], [ [[B]], [[ALIAS_CONT101]] ], [ [[TMP44]], [[COPY102]] ] -; CHECK-NEXT: [[COL_CAST107:%.*]] = bitcast <6 x double>* [[TMP41]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD108:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST107]], align 8 -; CHECK-NEXT: [[COL_GEP109:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP41]], i64 0, i64 3 -; CHECK-NEXT: [[COL_CAST110:%.*]] = bitcast double* [[COL_GEP109]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD111:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST110]], align 8 -; CHECK-NEXT: [[COL_CAST113:%.*]] = bitcast <6 x double>* [[TMP47]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD114:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST113]], align 8 -; CHECK-NEXT: [[COL_GEP115:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP47]], i64 0, i64 2 -; CHECK-NEXT: [[COL_CAST116:%.*]] = bitcast double* [[COL_GEP115]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD117:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST116]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT120:%.*]] = shufflevector <2 x double> [[COL_LOAD114]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP48:%.*]] = fmul <2 x double> [[COL_LOAD108]], [[SPLAT_SPLAT120]] -; CHECK-NEXT: [[SPLAT_SPLAT123:%.*]] = shufflevector <2 x double> [[COL_LOAD114]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP49:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD111]], <2 x double> [[SPLAT_SPLAT123]], <2 x double> [[TMP48]]) -; CHECK-NEXT: [[SPLAT_SPLAT126:%.*]] = shufflevector <2 x double> [[COL_LOAD117]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP50:%.*]] = fmul <2 x double> [[COL_LOAD108]], [[SPLAT_SPLAT126]] -; CHECK-NEXT: [[SPLAT_SPLAT129:%.*]] = shufflevector <2 x double> [[COL_LOAD117]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP51:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD111]], <2 x double> [[SPLAT_SPLAT129]], <2 x double> [[TMP50]]) -; CHECK-NEXT: [[COL_CAST131:%.*]] = bitcast <9 x double>* [[C]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP49]], <2 x double>* [[COL_CAST131]], align 8 -; CHECK-NEXT: [[COL_GEP132:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 3 -; CHECK-NEXT: [[COL_CAST133:%.*]] = bitcast double* [[COL_GEP132]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP51]], <2 x double>* [[COL_CAST133]], align 8 +; CHECK-NEXT: [[TMP47:%.*]] = phi <6 x double>* [ [[B]], [[NO_ALIAS93]] ], [ [[B]], [[ALIAS_CONT98]] ], [ [[TMP44]], [[COPY99]] ] +; CHECK-NEXT: [[VEC_CAST106:%.*]] = bitcast <6 x double>* [[TMP41]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD107:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST106]], align 8 +; CHECK-NEXT: [[VEC_GEP108:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP41]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST109:%.*]] = bitcast double* [[VEC_GEP108]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD110:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST109]], align 8 +; CHECK-NEXT: [[VEC_CAST112:%.*]] = bitcast <6 x double>* [[TMP47]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD113:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST112]], align 8 +; CHECK-NEXT: [[VEC_GEP114:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP47]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST115:%.*]] = bitcast double* [[VEC_GEP114]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD116:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST115]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT119:%.*]] = shufflevector <2 x double> [[COL_LOAD113]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP48:%.*]] = fmul <2 x double> [[COL_LOAD107]], [[SPLAT_SPLAT119]] +; CHECK-NEXT: [[SPLAT_SPLAT122:%.*]] = shufflevector <2 x double> [[COL_LOAD113]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD110]], <2 x double> [[SPLAT_SPLAT122]], <2 x double> [[TMP48]]) +; CHECK-NEXT: [[SPLAT_SPLAT125:%.*]] = shufflevector <2 x double> [[COL_LOAD116]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP50:%.*]] = fmul <2 x double> [[COL_LOAD107]], [[SPLAT_SPLAT125]] +; CHECK-NEXT: [[SPLAT_SPLAT128:%.*]] = shufflevector <2 x double> [[COL_LOAD116]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP51:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD110]], <2 x double> [[SPLAT_SPLAT128]], <2 x double> [[TMP50]]) +; CHECK-NEXT: [[VEC_CAST130:%.*]] = bitcast <9 x double>* [[C]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP49]], <2 x double>* [[VEC_CAST130]], align 8 +; CHECK-NEXT: [[VEC_GEP131:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST132:%.*]] = bitcast double* [[VEC_GEP131]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP51]], <2 x double>* [[VEC_CAST132]], align 8 ; CHECK-NEXT: [[TMP52:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP41]], i64 0, i64 2 -; CHECK-NEXT: [[COL_CAST135:%.*]] = bitcast double* [[TMP52]] to <1 x double>* -; CHECK-NEXT: [[COL_LOAD136:%.*]] = load <1 x double>, <1 x double>* [[COL_CAST135]], align 8 -; CHECK-NEXT: [[COL_GEP137:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP41]], i64 0, i64 5 -; CHECK-NEXT: [[COL_CAST138:%.*]] = bitcast double* [[COL_GEP137]] to <1 x double>* -; CHECK-NEXT: [[COL_LOAD139:%.*]] = load <1 x double>, <1 x double>* [[COL_CAST138]], align 8 -; CHECK-NEXT: [[COL_CAST141:%.*]] = bitcast <6 x double>* [[TMP47]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD142:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST141]], align 8 -; CHECK-NEXT: [[COL_GEP143:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP47]], i64 0, i64 2 -; CHECK-NEXT: [[COL_CAST144:%.*]] = bitcast double* [[COL_GEP143]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD145:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST144]], align 8 -; CHECK-NEXT: [[SPLAT_SPLATINSERT147:%.*]] = shufflevector <2 x double> [[COL_LOAD142]], <2 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP53:%.*]] = fmul <1 x double> [[COL_LOAD136]], [[SPLAT_SPLATINSERT147]] -; CHECK-NEXT: [[SPLAT_SPLATINSERT150:%.*]] = shufflevector <2 x double> [[COL_LOAD142]], <2 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP54:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD139]], <1 x double> [[SPLAT_SPLATINSERT150]], <1 x double> [[TMP53]]) -; CHECK-NEXT: [[SPLAT_SPLATINSERT153:%.*]] = shufflevector <2 x double> [[COL_LOAD145]], <2 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = fmul <1 x double> [[COL_LOAD136]], [[SPLAT_SPLATINSERT153]] -; CHECK-NEXT: [[SPLAT_SPLATINSERT156:%.*]] = shufflevector <2 x double> [[COL_LOAD145]], <2 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP56:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD139]], <1 x double> [[SPLAT_SPLATINSERT156]], <1 x double> [[TMP55]]) +; CHECK-NEXT: [[VEC_CAST134:%.*]] = bitcast double* [[TMP52]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD135:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST134]], align 8 +; CHECK-NEXT: [[VEC_GEP136:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP41]], i64 0, i64 5 +; CHECK-NEXT: [[VEC_CAST137:%.*]] = bitcast double* [[VEC_GEP136]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD138:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST137]], align 8 +; CHECK-NEXT: [[VEC_CAST140:%.*]] = bitcast <6 x double>* [[TMP47]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD141:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST140]], align 8 +; CHECK-NEXT: [[VEC_GEP142:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP47]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST143:%.*]] = bitcast double* [[VEC_GEP142]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD144:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST143]], align 8 +; CHECK-NEXT: [[SPLAT_SPLATINSERT146:%.*]] = shufflevector <2 x double> [[COL_LOAD141]], <2 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP53:%.*]] = fmul <1 x double> [[COL_LOAD135]], [[SPLAT_SPLATINSERT146]] +; CHECK-NEXT: [[SPLAT_SPLATINSERT149:%.*]] = shufflevector <2 x double> [[COL_LOAD141]], <2 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD138]], <1 x double> [[SPLAT_SPLATINSERT149]], <1 x double> [[TMP53]]) +; CHECK-NEXT: [[SPLAT_SPLATINSERT152:%.*]] = shufflevector <2 x double> [[COL_LOAD144]], <2 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP55:%.*]] = fmul <1 x double> [[COL_LOAD135]], [[SPLAT_SPLATINSERT152]] +; CHECK-NEXT: [[SPLAT_SPLATINSERT155:%.*]] = shufflevector <2 x double> [[COL_LOAD144]], <2 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD138]], <1 x double> [[SPLAT_SPLATINSERT155]], <1 x double> [[TMP55]]) ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 2 -; CHECK-NEXT: [[COL_CAST159:%.*]] = bitcast double* [[TMP57]] to <1 x double>* -; CHECK-NEXT: store <1 x double> [[TMP54]], <1 x double>* [[COL_CAST159]], align 8 -; CHECK-NEXT: [[COL_GEP160:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 5 -; CHECK-NEXT: [[COL_CAST161:%.*]] = bitcast double* [[COL_GEP160]] to <1 x double>* -; CHECK-NEXT: store <1 x double> [[TMP56]], <1 x double>* [[COL_CAST161]], align 8 -; CHECK-NEXT: [[COL_CAST163:%.*]] = bitcast <6 x double>* [[TMP41]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD164:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST163]], align 8 -; CHECK-NEXT: [[COL_GEP165:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP41]], i64 0, i64 3 -; CHECK-NEXT: [[COL_CAST166:%.*]] = bitcast double* [[COL_GEP165]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD167:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST166]], align 8 +; CHECK-NEXT: [[VEC_CAST158:%.*]] = bitcast double* [[TMP57]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP54]], <1 x double>* [[VEC_CAST158]], align 8 +; CHECK-NEXT: [[VEC_GEP159:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 5 +; CHECK-NEXT: [[VEC_CAST160:%.*]] = bitcast double* [[VEC_GEP159]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP56]], <1 x double>* [[VEC_CAST160]], align 8 +; CHECK-NEXT: [[VEC_CAST162:%.*]] = bitcast <6 x double>* [[TMP41]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD163:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST162]], align 8 +; CHECK-NEXT: [[VEC_GEP164:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP41]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST165:%.*]] = bitcast double* [[VEC_GEP164]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD166:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST165]], align 8 ; CHECK-NEXT: [[TMP58:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP47]], i64 0, i64 4 -; CHECK-NEXT: [[COL_CAST169:%.*]] = bitcast double* [[TMP58]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD170:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST169]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT173:%.*]] = shufflevector <2 x double> [[COL_LOAD170]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP59:%.*]] = fmul <2 x double> [[COL_LOAD164]], [[SPLAT_SPLAT173]] -; CHECK-NEXT: [[SPLAT_SPLAT176:%.*]] = shufflevector <2 x double> [[COL_LOAD170]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP60:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD167]], <2 x double> [[SPLAT_SPLAT176]], <2 x double> [[TMP59]]) +; CHECK-NEXT: [[VEC_CAST168:%.*]] = bitcast double* [[TMP58]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD169:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST168]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT172:%.*]] = shufflevector <2 x double> [[COL_LOAD169]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP59:%.*]] = fmul <2 x double> [[COL_LOAD163]], [[SPLAT_SPLAT172]] +; CHECK-NEXT: [[SPLAT_SPLAT175:%.*]] = shufflevector <2 x double> [[COL_LOAD169]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD166]], <2 x double> [[SPLAT_SPLAT175]], <2 x double> [[TMP59]]) ; CHECK-NEXT: [[TMP61:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 6 -; CHECK-NEXT: [[COL_CAST178:%.*]] = bitcast double* [[TMP61]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP60]], <2 x double>* [[COL_CAST178]], align 8 +; CHECK-NEXT: [[VEC_CAST177:%.*]] = bitcast double* [[TMP61]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP60]], <2 x double>* [[VEC_CAST177]], align 8 ; CHECK-NEXT: [[TMP62:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP41]], i64 0, i64 2 -; CHECK-NEXT: [[COL_CAST180:%.*]] = bitcast double* [[TMP62]] to <1 x double>* -; CHECK-NEXT: [[COL_LOAD181:%.*]] = load <1 x double>, <1 x double>* [[COL_CAST180]], align 8 -; CHECK-NEXT: [[COL_GEP182:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP41]], i64 0, i64 5 -; CHECK-NEXT: [[COL_CAST183:%.*]] = bitcast double* [[COL_GEP182]] to <1 x double>* -; CHECK-NEXT: [[COL_LOAD184:%.*]] = load <1 x double>, <1 x double>* [[COL_CAST183]], align 8 +; CHECK-NEXT: [[VEC_CAST179:%.*]] = bitcast double* [[TMP62]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD180:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST179]], align 8 +; CHECK-NEXT: [[VEC_GEP181:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP41]], i64 0, i64 5 +; CHECK-NEXT: [[VEC_CAST182:%.*]] = bitcast double* [[VEC_GEP181]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD183:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST182]], align 8 ; CHECK-NEXT: [[TMP63:%.*]] = getelementptr <6 x double>, <6 x double>* [[TMP47]], i64 0, i64 4 -; CHECK-NEXT: [[COL_CAST186:%.*]] = bitcast double* [[TMP63]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD187:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST186]], align 8 -; CHECK-NEXT: [[SPLAT_SPLATINSERT189:%.*]] = shufflevector <2 x double> [[COL_LOAD187]], <2 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP64:%.*]] = fmul <1 x double> [[COL_LOAD181]], [[SPLAT_SPLATINSERT189]] -; CHECK-NEXT: [[SPLAT_SPLATINSERT192:%.*]] = shufflevector <2 x double> [[COL_LOAD187]], <2 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP65:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD184]], <1 x double> [[SPLAT_SPLATINSERT192]], <1 x double> [[TMP64]]) +; CHECK-NEXT: [[VEC_CAST185:%.*]] = bitcast double* [[TMP63]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD186:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST185]], align 8 +; CHECK-NEXT: [[SPLAT_SPLATINSERT188:%.*]] = shufflevector <2 x double> [[COL_LOAD186]], <2 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP64:%.*]] = fmul <1 x double> [[COL_LOAD180]], [[SPLAT_SPLATINSERT188]] +; CHECK-NEXT: [[SPLAT_SPLATINSERT191:%.*]] = shufflevector <2 x double> [[COL_LOAD186]], <2 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD183]], <1 x double> [[SPLAT_SPLATINSERT191]], <1 x double> [[TMP64]]) ; CHECK-NEXT: [[TMP66:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 8 -; CHECK-NEXT: [[COL_CAST195:%.*]] = bitcast double* [[TMP66]] to <1 x double>* -; CHECK-NEXT: store <1 x double> [[TMP65]], <1 x double>* [[COL_CAST195]], align 8 +; CHECK-NEXT: [[VEC_CAST194:%.*]] = bitcast double* [[TMP66]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP65]], <1 x double>* [[VEC_CAST194]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll index 3eb1bd40387c..e2d93ca76f58 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll @@ -22,77 +22,77 @@ define void @multiply_all_volatile(<4 x double>* noalias %A, <4 x double>* noali ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP17:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP23:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP15:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP21:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: -; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[INNER_IV]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double>* [[A:%.*]] to double* -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, double* [[TMP4]], i64 [[TMP3]] -; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP5]] to <4 x double>* -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double>* [[COL_CAST]] to double* -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP6]] to <2 x double>* +; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INNER_IV]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x double>* [[A:%.*]] to double* +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, double* [[TMP2]], i64 [[TMP1]] +; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP3]] to <4 x double>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double>* [[COL_CAST]] to double* +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP4]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP6]], i64 2 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP4]], i64 2 ; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[COLS_IV]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[INNER_IV]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x double>* [[B:%.*]] to double* -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, double* [[TMP9]], i64 [[TMP8]] -; CHECK-NEXT: [[COL_CAST3:%.*]] = bitcast double* [[TMP10]] to <4 x double>* -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x double>* [[COL_CAST3]] to double* -; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP11]] to <2 x double>* +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[COLS_IV]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], [[INNER_IV]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x double>* [[B:%.*]] to double* +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr double, double* [[TMP7]], i64 [[TMP6]] +; CHECK-NEXT: [[COL_CAST3:%.*]] = bitcast double* [[TMP8]] to <4 x double>* +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x double>* [[COL_CAST3]] to double* +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP9]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST4]], align 8 -; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP11]], i64 2 +; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP9]], i64 2 ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8 -; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> undef, double [[TMP12]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> undef, double [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP13]]) -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP17]] = shufflevector <2 x double> [[TMP0]], <2 x double> [[TMP16]], <2 x i32> -; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP15]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP14]], <2 x i32> +; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> undef, double [[TMP18]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP16]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> undef, double [[TMP20]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP19]]) -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP21]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP23]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP22]], <2 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]]) +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP21]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP20]], <2 x i32> ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND:%.*]] = icmp ne i64 [[INNER_STEP]], 2 -; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], [[LOOP0:!llvm.loop !.*]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND:%.*]] = icmp ne i64 [[ROWS_STEP]], 2 -; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[COLS_IV]], 2 -; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[TMP24]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP26:%.*]] = bitcast <4 x double>* [[C:%.*]] to double* -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr double, double* [[TMP26]], i64 [[TMP25]] -; CHECK-NEXT: [[COL_CAST20:%.*]] = bitcast double* [[TMP27]] to <4 x double>* -; CHECK-NEXT: [[TMP28:%.*]] = bitcast <4 x double>* [[COL_CAST20]] to double* -; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[TMP28]] to <2 x double>* -; CHECK-NEXT: store volatile <2 x double> [[TMP17]], <2 x double>* [[VEC_CAST21]], align 8 -; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr double, double* [[TMP28]], i64 2 +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[COLS_IV]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x double>* [[C:%.*]] to double* +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, double* [[TMP24]], i64 [[TMP23]] +; CHECK-NEXT: [[COL_CAST20:%.*]] = bitcast double* [[TMP25]] to <4 x double>* +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <4 x double>* [[COL_CAST20]] to double* +; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[TMP26]] to <2 x double>* +; CHECK-NEXT: store volatile <2 x double> [[TMP15]], <2 x double>* [[VEC_CAST21]], align 8 +; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr double, double* [[TMP26]], i64 2 ; CHECK-NEXT: [[VEC_CAST23:%.*]] = bitcast double* [[VEC_GEP22]] to <2 x double>* -; CHECK-NEXT: store volatile <2 x double> [[TMP23]], <2 x double>* [[VEC_CAST23]], align 8 +; CHECK-NEXT: store volatile <2 x double> [[TMP21]], <2 x double>* [[VEC_CAST23]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND]], label [[ROWS_HEADER]], label [[COLS_LATCH]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 @@ -130,77 +130,77 @@ define void @multiply_load0_volatile(<4 x double>* noalias %A, <4 x double>* noa ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP17:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP23:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP15:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP21:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: -; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[INNER_IV]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double>* [[A:%.*]] to double* -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, double* [[TMP4]], i64 [[TMP3]] -; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP5]] to <4 x double>* -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double>* [[COL_CAST]] to double* -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP6]] to <2 x double>* +; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INNER_IV]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x double>* [[A:%.*]] to double* +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, double* [[TMP2]], i64 [[TMP1]] +; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP3]] to <4 x double>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double>* [[COL_CAST]] to double* +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP4]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP6]], i64 2 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP4]], i64 2 ; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[COLS_IV]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[INNER_IV]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x double>* [[B:%.*]] to double* -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, double* [[TMP9]], i64 [[TMP8]] -; CHECK-NEXT: [[COL_CAST3:%.*]] = bitcast double* [[TMP10]] to <4 x double>* -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x double>* [[COL_CAST3]] to double* -; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP11]] to <2 x double>* +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[COLS_IV]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], [[INNER_IV]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x double>* [[B:%.*]] to double* +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr double, double* [[TMP7]], i64 [[TMP6]] +; CHECK-NEXT: [[COL_CAST3:%.*]] = bitcast double* [[TMP8]] to <4 x double>* +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x double>* [[COL_CAST3]] to double* +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP9]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST4]], align 8 -; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP11]], i64 2 +; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP9]], i64 2 ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8 -; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> undef, double [[TMP12]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> undef, double [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP13]]) -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP17]] = shufflevector <2 x double> [[TMP0]], <2 x double> [[TMP16]], <2 x i32> -; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP15]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP14]], <2 x i32> +; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> undef, double [[TMP18]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP16]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> undef, double [[TMP20]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP19]]) -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP21]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP23]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP22]], <2 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]]) +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP21]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP20]], <2 x i32> ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND:%.*]] = icmp ne i64 [[INNER_STEP]], 2 -; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], [[LOOP2:!llvm.loop !.*]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND:%.*]] = icmp ne i64 [[ROWS_STEP]], 2 -; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[COLS_IV]], 2 -; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[TMP24]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP26:%.*]] = bitcast <4 x double>* [[C:%.*]] to double* -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr double, double* [[TMP26]], i64 [[TMP25]] -; CHECK-NEXT: [[COL_CAST20:%.*]] = bitcast double* [[TMP27]] to <4 x double>* -; CHECK-NEXT: [[TMP28:%.*]] = bitcast <4 x double>* [[COL_CAST20]] to double* -; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[TMP28]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[VEC_CAST21]], align 8 -; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr double, double* [[TMP28]], i64 2 +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[COLS_IV]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x double>* [[C:%.*]] to double* +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, double* [[TMP24]], i64 [[TMP23]] +; CHECK-NEXT: [[COL_CAST20:%.*]] = bitcast double* [[TMP25]] to <4 x double>* +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <4 x double>* [[COL_CAST20]] to double* +; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[TMP26]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP15]], <2 x double>* [[VEC_CAST21]], align 8 +; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr double, double* [[TMP26]], i64 2 ; CHECK-NEXT: [[VEC_CAST23:%.*]] = bitcast double* [[VEC_GEP22]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP23]], <2 x double>* [[VEC_CAST23]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[VEC_CAST23]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND]], label [[ROWS_HEADER]], label [[COLS_LATCH]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 @@ -237,77 +237,77 @@ define void @multiply_load1_volatile(<4 x double>* noalias %A, <4 x double>* noa ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP17:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP23:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP15:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP21:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: -; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[INNER_IV]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double>* [[A:%.*]] to double* -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, double* [[TMP4]], i64 [[TMP3]] -; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP5]] to <4 x double>* -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double>* [[COL_CAST]] to double* -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP6]] to <2 x double>* +; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INNER_IV]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x double>* [[A:%.*]] to double* +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, double* [[TMP2]], i64 [[TMP1]] +; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP3]] to <4 x double>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double>* [[COL_CAST]] to double* +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP4]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP6]], i64 2 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP4]], i64 2 ; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[COLS_IV]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[INNER_IV]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x double>* [[B:%.*]] to double* -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, double* [[TMP9]], i64 [[TMP8]] -; CHECK-NEXT: [[COL_CAST3:%.*]] = bitcast double* [[TMP10]] to <4 x double>* -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x double>* [[COL_CAST3]] to double* -; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP11]] to <2 x double>* +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[COLS_IV]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], [[INNER_IV]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x double>* [[B:%.*]] to double* +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr double, double* [[TMP7]], i64 [[TMP6]] +; CHECK-NEXT: [[COL_CAST3:%.*]] = bitcast double* [[TMP8]] to <4 x double>* +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x double>* [[COL_CAST3]] to double* +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP9]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST4]], align 8 -; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP11]], i64 2 +; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP9]], i64 2 ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8 -; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> undef, double [[TMP12]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> undef, double [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP13]]) -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP17]] = shufflevector <2 x double> [[TMP0]], <2 x double> [[TMP16]], <2 x i32> -; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP15]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP14]], <2 x i32> +; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> undef, double [[TMP18]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP16]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> undef, double [[TMP20]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP19]]) -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP21]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP23]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP22]], <2 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]]) +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP21]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP20]], <2 x i32> ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND:%.*]] = icmp ne i64 [[INNER_STEP]], 2 -; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], !llvm.loop !3 +; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], [[LOOP3:!llvm.loop !.*]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND:%.*]] = icmp ne i64 [[ROWS_STEP]], 2 -; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[COLS_IV]], 2 -; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[TMP24]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP26:%.*]] = bitcast <4 x double>* [[C:%.*]] to double* -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr double, double* [[TMP26]], i64 [[TMP25]] -; CHECK-NEXT: [[COL_CAST20:%.*]] = bitcast double* [[TMP27]] to <4 x double>* -; CHECK-NEXT: [[TMP28:%.*]] = bitcast <4 x double>* [[COL_CAST20]] to double* -; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[TMP28]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[VEC_CAST21]], align 8 -; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr double, double* [[TMP28]], i64 2 +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[COLS_IV]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x double>* [[C:%.*]] to double* +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, double* [[TMP24]], i64 [[TMP23]] +; CHECK-NEXT: [[COL_CAST20:%.*]] = bitcast double* [[TMP25]] to <4 x double>* +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <4 x double>* [[COL_CAST20]] to double* +; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[TMP26]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP15]], <2 x double>* [[VEC_CAST21]], align 8 +; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr double, double* [[TMP26]], i64 2 ; CHECK-NEXT: [[VEC_CAST23:%.*]] = bitcast double* [[VEC_GEP22]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP23]], <2 x double>* [[VEC_CAST23]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[VEC_CAST23]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND]], label [[ROWS_HEADER]], label [[COLS_LATCH]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 @@ -344,77 +344,77 @@ define void @multiply_store_volatile(<4 x double>* noalias %A, <4 x double>* noa ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP17:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP23:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP15:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP21:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: -; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[INNER_IV]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double>* [[A:%.*]] to double* -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, double* [[TMP4]], i64 [[TMP3]] -; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP5]] to <4 x double>* -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double>* [[COL_CAST]] to double* -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP6]] to <2 x double>* +; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INNER_IV]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x double>* [[A:%.*]] to double* +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, double* [[TMP2]], i64 [[TMP1]] +; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP3]] to <4 x double>* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double>* [[COL_CAST]] to double* +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP4]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP6]], i64 2 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP4]], i64 2 ; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[COLS_IV]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[INNER_IV]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x double>* [[B:%.*]] to double* -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, double* [[TMP9]], i64 [[TMP8]] -; CHECK-NEXT: [[COL_CAST3:%.*]] = bitcast double* [[TMP10]] to <4 x double>* -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x double>* [[COL_CAST3]] to double* -; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP11]] to <2 x double>* +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[COLS_IV]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], [[INNER_IV]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x double>* [[B:%.*]] to double* +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr double, double* [[TMP7]], i64 [[TMP6]] +; CHECK-NEXT: [[COL_CAST3:%.*]] = bitcast double* [[TMP8]] to <4 x double>* +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x double>* [[COL_CAST3]] to double* +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP9]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST4]], align 8 -; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP11]], i64 2 +; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP9]], i64 2 ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8 -; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> undef, double [[TMP12]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> undef, double [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP13]]) -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP17]] = shufflevector <2 x double> [[TMP0]], <2 x double> [[TMP16]], <2 x i32> -; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP15]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP14]], <2 x i32> +; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> undef, double [[TMP18]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP16]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> undef, double [[TMP20]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP19]]) -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP21]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP23]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP22]], <2 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]]) +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP21]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP20]], <2 x i32> ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND:%.*]] = icmp ne i64 [[INNER_STEP]], 2 -; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], !llvm.loop !4 +; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], [[LOOP4:!llvm.loop !.*]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND:%.*]] = icmp ne i64 [[ROWS_STEP]], 2 -; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[COLS_IV]], 2 -; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[TMP24]], [[ROWS_IV]] -; CHECK-NEXT: [[TMP26:%.*]] = bitcast <4 x double>* [[C:%.*]] to double* -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr double, double* [[TMP26]], i64 [[TMP25]] -; CHECK-NEXT: [[COL_CAST20:%.*]] = bitcast double* [[TMP27]] to <4 x double>* -; CHECK-NEXT: [[TMP28:%.*]] = bitcast <4 x double>* [[COL_CAST20]] to double* -; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[TMP28]] to <2 x double>* -; CHECK-NEXT: store volatile <2 x double> [[TMP17]], <2 x double>* [[VEC_CAST21]], align 8 -; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr double, double* [[TMP28]], i64 2 +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[COLS_IV]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[ROWS_IV]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x double>* [[C:%.*]] to double* +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, double* [[TMP24]], i64 [[TMP23]] +; CHECK-NEXT: [[COL_CAST20:%.*]] = bitcast double* [[TMP25]] to <4 x double>* +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <4 x double>* [[COL_CAST20]] to double* +; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[TMP26]] to <2 x double>* +; CHECK-NEXT: store volatile <2 x double> [[TMP15]], <2 x double>* [[VEC_CAST21]], align 8 +; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr double, double* [[TMP26]], i64 2 ; CHECK-NEXT: [[VEC_CAST23:%.*]] = bitcast double* [[VEC_GEP22]] to <2 x double>* -; CHECK-NEXT: store volatile <2 x double> [[TMP23]], <2 x double>* [[VEC_CAST23]], align 8 +; CHECK-NEXT: store volatile <2 x double> [[TMP21]], <2 x double>* [[VEC_CAST23]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND]], label [[ROWS_HEADER]], label [[COLS_LATCH]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll index edc3034c4917..7816d3007b94 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll @@ -59,11 +59,11 @@ define void @multiply(<16 x double> * %A, <16 x double> * %B, <16 x double>* %C) ; CHECK-NEXT: [[COL_GEP14:%.*]] = getelementptr <16 x double>, <16 x double>* [[TMP11]], i64 0, i64 4 ; CHECK-NEXT: [[COL_CAST15:%.*]] = bitcast double* [[COL_GEP14]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD16:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST15]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD13]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD13]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[COL_LOAD13]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD10]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP12]]) -; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <2 x double> [[COL_LOAD16]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <2 x double> [[COL_LOAD16]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = fmul <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT22]] ; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <2 x double> [[COL_LOAD16]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD10]], <2 x double> [[SPLAT_SPLAT25]], <2 x double> [[TMP14]]) @@ -82,11 +82,11 @@ define void @multiply(<16 x double> * %A, <16 x double> * %B, <16 x double>* %C) ; CHECK-NEXT: [[COL_GEP35:%.*]] = getelementptr <16 x double>, <16 x double>* [[TMP11]], i64 0, i64 6 ; CHECK-NEXT: [[COL_CAST36:%.*]] = bitcast double* [[COL_GEP35]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD37:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST36]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT41:%.*]] = shufflevector <2 x double> [[COL_LOAD34]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT41:%.*]] = shufflevector <2 x double> [[COL_LOAD34]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD28]], <2 x double> [[SPLAT_SPLAT41]], <2 x double> [[TMP13]]) ; CHECK-NEXT: [[SPLAT_SPLAT44:%.*]] = shufflevector <2 x double> [[COL_LOAD34]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD31]], <2 x double> [[SPLAT_SPLAT44]], <2 x double> [[TMP18]]) -; CHECK-NEXT: [[SPLAT_SPLAT48:%.*]] = shufflevector <2 x double> [[COL_LOAD37]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT48:%.*]] = shufflevector <2 x double> [[COL_LOAD37]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD28]], <2 x double> [[SPLAT_SPLAT48]], <2 x double> [[TMP15]]) ; CHECK-NEXT: [[SPLAT_SPLAT51:%.*]] = shufflevector <2 x double> [[COL_LOAD37]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD31]], <2 x double> [[SPLAT_SPLAT51]], <2 x double> [[TMP20]]) @@ -112,11 +112,11 @@ define void @multiply(<16 x double> * %A, <16 x double> * %B, <16 x double>* %C) ; CHECK-NEXT: [[COL_GEP65:%.*]] = getelementptr <16 x double>, <16 x double>* [[TMP11]], i64 0, i64 4 ; CHECK-NEXT: [[COL_CAST66:%.*]] = bitcast double* [[COL_GEP65]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD67:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST66]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT70:%.*]] = shufflevector <2 x double> [[COL_LOAD64]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT70:%.*]] = shufflevector <2 x double> [[COL_LOAD64]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> [[COL_LOAD58]], [[SPLAT_SPLAT70]] ; CHECK-NEXT: [[SPLAT_SPLAT73:%.*]] = shufflevector <2 x double> [[COL_LOAD64]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD61]], <2 x double> [[SPLAT_SPLAT73]], <2 x double> [[TMP23]]) -; CHECK-NEXT: [[SPLAT_SPLAT76:%.*]] = shufflevector <2 x double> [[COL_LOAD67]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT76:%.*]] = shufflevector <2 x double> [[COL_LOAD67]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = fmul <2 x double> [[COL_LOAD58]], [[SPLAT_SPLAT76]] ; CHECK-NEXT: [[SPLAT_SPLAT79:%.*]] = shufflevector <2 x double> [[COL_LOAD67]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD61]], <2 x double> [[SPLAT_SPLAT79]], <2 x double> [[TMP25]]) @@ -135,11 +135,11 @@ define void @multiply(<16 x double> * %A, <16 x double> * %B, <16 x double>* %C) ; CHECK-NEXT: [[COL_GEP89:%.*]] = getelementptr <16 x double>, <16 x double>* [[TMP11]], i64 0, i64 6 ; CHECK-NEXT: [[COL_CAST90:%.*]] = bitcast double* [[COL_GEP89]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD91:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST90]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT95:%.*]] = shufflevector <2 x double> [[COL_LOAD88]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT95:%.*]] = shufflevector <2 x double> [[COL_LOAD88]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP29:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD82]], <2 x double> [[SPLAT_SPLAT95]], <2 x double> [[TMP24]]) ; CHECK-NEXT: [[SPLAT_SPLAT98:%.*]] = shufflevector <2 x double> [[COL_LOAD88]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD85]], <2 x double> [[SPLAT_SPLAT98]], <2 x double> [[TMP29]]) -; CHECK-NEXT: [[SPLAT_SPLAT102:%.*]] = shufflevector <2 x double> [[COL_LOAD91]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT102:%.*]] = shufflevector <2 x double> [[COL_LOAD91]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP31:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD82]], <2 x double> [[SPLAT_SPLAT102]], <2 x double> [[TMP26]]) ; CHECK-NEXT: [[SPLAT_SPLAT105:%.*]] = shufflevector <2 x double> [[COL_LOAD91]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP32:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD85]], <2 x double> [[SPLAT_SPLAT105]], <2 x double> [[TMP31]]) @@ -166,11 +166,11 @@ define void @multiply(<16 x double> * %A, <16 x double> * %B, <16 x double>* %C) ; CHECK-NEXT: [[COL_GEP119:%.*]] = getelementptr <16 x double>, <16 x double>* [[TMP11]], i64 0, i64 12 ; CHECK-NEXT: [[COL_CAST120:%.*]] = bitcast double* [[COL_GEP119]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD121:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST120]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT124:%.*]] = shufflevector <2 x double> [[COL_LOAD118]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT124:%.*]] = shufflevector <2 x double> [[COL_LOAD118]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP35:%.*]] = fmul <2 x double> [[COL_LOAD112]], [[SPLAT_SPLAT124]] ; CHECK-NEXT: [[SPLAT_SPLAT127:%.*]] = shufflevector <2 x double> [[COL_LOAD118]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP36:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD115]], <2 x double> [[SPLAT_SPLAT127]], <2 x double> [[TMP35]]) -; CHECK-NEXT: [[SPLAT_SPLAT130:%.*]] = shufflevector <2 x double> [[COL_LOAD121]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT130:%.*]] = shufflevector <2 x double> [[COL_LOAD121]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP37:%.*]] = fmul <2 x double> [[COL_LOAD112]], [[SPLAT_SPLAT130]] ; CHECK-NEXT: [[SPLAT_SPLAT133:%.*]] = shufflevector <2 x double> [[COL_LOAD121]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP38:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD115]], <2 x double> [[SPLAT_SPLAT133]], <2 x double> [[TMP37]]) @@ -189,11 +189,11 @@ define void @multiply(<16 x double> * %A, <16 x double> * %B, <16 x double>* %C) ; CHECK-NEXT: [[COL_GEP143:%.*]] = getelementptr <16 x double>, <16 x double>* [[TMP11]], i64 0, i64 14 ; CHECK-NEXT: [[COL_CAST144:%.*]] = bitcast double* [[COL_GEP143]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD145:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST144]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT149:%.*]] = shufflevector <2 x double> [[COL_LOAD142]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT149:%.*]] = shufflevector <2 x double> [[COL_LOAD142]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP41:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD136]], <2 x double> [[SPLAT_SPLAT149]], <2 x double> [[TMP36]]) ; CHECK-NEXT: [[SPLAT_SPLAT152:%.*]] = shufflevector <2 x double> [[COL_LOAD142]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP42:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD139]], <2 x double> [[SPLAT_SPLAT152]], <2 x double> [[TMP41]]) -; CHECK-NEXT: [[SPLAT_SPLAT156:%.*]] = shufflevector <2 x double> [[COL_LOAD145]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT156:%.*]] = shufflevector <2 x double> [[COL_LOAD145]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP43:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD136]], <2 x double> [[SPLAT_SPLAT156]], <2 x double> [[TMP38]]) ; CHECK-NEXT: [[SPLAT_SPLAT159:%.*]] = shufflevector <2 x double> [[COL_LOAD145]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP44:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD139]], <2 x double> [[SPLAT_SPLAT159]], <2 x double> [[TMP43]]) @@ -221,11 +221,11 @@ define void @multiply(<16 x double> * %A, <16 x double> * %B, <16 x double>* %C) ; CHECK-NEXT: [[COL_GEP173:%.*]] = getelementptr <16 x double>, <16 x double>* [[TMP11]], i64 0, i64 12 ; CHECK-NEXT: [[COL_CAST174:%.*]] = bitcast double* [[COL_GEP173]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD175:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST174]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT178:%.*]] = shufflevector <2 x double> [[COL_LOAD172]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT178:%.*]] = shufflevector <2 x double> [[COL_LOAD172]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP48:%.*]] = fmul <2 x double> [[COL_LOAD166]], [[SPLAT_SPLAT178]] ; CHECK-NEXT: [[SPLAT_SPLAT181:%.*]] = shufflevector <2 x double> [[COL_LOAD172]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD169]], <2 x double> [[SPLAT_SPLAT181]], <2 x double> [[TMP48]]) -; CHECK-NEXT: [[SPLAT_SPLAT184:%.*]] = shufflevector <2 x double> [[COL_LOAD175]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT184:%.*]] = shufflevector <2 x double> [[COL_LOAD175]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP50:%.*]] = fmul <2 x double> [[COL_LOAD166]], [[SPLAT_SPLAT184]] ; CHECK-NEXT: [[SPLAT_SPLAT187:%.*]] = shufflevector <2 x double> [[COL_LOAD175]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD169]], <2 x double> [[SPLAT_SPLAT187]], <2 x double> [[TMP50]]) @@ -244,11 +244,11 @@ define void @multiply(<16 x double> * %A, <16 x double> * %B, <16 x double>* %C) ; CHECK-NEXT: [[COL_GEP197:%.*]] = getelementptr <16 x double>, <16 x double>* [[TMP11]], i64 0, i64 14 ; CHECK-NEXT: [[COL_CAST198:%.*]] = bitcast double* [[COL_GEP197]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD199:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST198]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT203:%.*]] = shufflevector <2 x double> [[COL_LOAD196]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT203:%.*]] = shufflevector <2 x double> [[COL_LOAD196]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP54:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD190]], <2 x double> [[SPLAT_SPLAT203]], <2 x double> [[TMP49]]) ; CHECK-NEXT: [[SPLAT_SPLAT206:%.*]] = shufflevector <2 x double> [[COL_LOAD196]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP55:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD193]], <2 x double> [[SPLAT_SPLAT206]], <2 x double> [[TMP54]]) -; CHECK-NEXT: [[SPLAT_SPLAT210:%.*]] = shufflevector <2 x double> [[COL_LOAD199]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT210:%.*]] = shufflevector <2 x double> [[COL_LOAD199]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP56:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD190]], <2 x double> [[SPLAT_SPLAT210]], <2 x double> [[TMP51]]) ; CHECK-NEXT: [[SPLAT_SPLAT213:%.*]] = shufflevector <2 x double> [[COL_LOAD199]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP57:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD193]], <2 x double> [[SPLAT_SPLAT213]], <2 x double> [[TMP56]]) diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll index 11db4ddfb066..f84499090537 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --verbose +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -lower-matrix-intrinsics -matrix-default-layout=row-major -S < %s | FileCheck --check-prefix=RM %s @@ -13,52 +13,52 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) { ; RM-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <2 x i32> ; RM-NEXT: [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> undef, i32 [[TMP0]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]] ; RM-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> undef, i32 [[TMP2]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT6]], [[BLOCK4]] ; RM-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]] ; RM-NEXT: [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> undef, <2 x i32> ; RM-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP5]], <2 x i32> ; RM-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x i32> undef, i32 [[TMP7]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT8]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT8]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP8:%.*]] = mul <1 x i32> [[SPLAT_SPLAT9]], [[BLOCK7]] ; RM-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> undef, i32 [[TMP9]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP10:%.*]] = mul <1 x i32> [[SPLAT_SPLAT12]], [[BLOCK10]] ; RM-NEXT: [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]] ; RM-NEXT: [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> undef, <2 x i32> ; RM-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP12]], <2 x i32> ; RM-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x i32> undef, i32 [[TMP14]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT14]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT14]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT15]], [[BLOCK13]] ; RM-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x i32> undef, i32 [[TMP16]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT17]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT17]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP17:%.*]] = mul <1 x i32> [[SPLAT_SPLAT18]], [[BLOCK16]] ; RM-NEXT: [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]] ; RM-NEXT: [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> undef, <2 x i32> ; RM-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP19]], <2 x i32> ; RM-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x i32> undef, i32 [[TMP21]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT20]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT20]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP22:%.*]] = mul <1 x i32> [[SPLAT_SPLAT21]], [[BLOCK19]] ; RM-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x i32> undef, i32 [[TMP23]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT23]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT23]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP24:%.*]] = mul <1 x i32> [[SPLAT_SPLAT24]], [[BLOCK22]] ; RM-NEXT: [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]] ; RM-NEXT: [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> undef, <2 x i32> @@ -74,7 +74,6 @@ entry: declare <4 x i32> @llvm.matrix.multiply.v4f64.v4f64.v4f64(<4 x i32>, <4 x i32>, i32, i32, i32) define <4 x i32> @multiply_1x2(<2 x i32> %a, <2 x i32> %b) { - ; RM-LABEL: @multiply_1x2( ; RM-NEXT: entry: ; RM-NEXT: [[SPLIT:%.*]] = shufflevector <2 x i32> [[A:%.*]], <2 x i32> undef, <1 x i32> zeroinitializer @@ -82,29 +81,29 @@ define <4 x i32> @multiply_1x2(<2 x i32> %a, <2 x i32> %b) { ; RM-NEXT: [[SPLIT2:%.*]] = shufflevector <2 x i32> [[B:%.*]], <2 x i32> undef, <2 x i32> ; RM-NEXT: [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP0:%.*]] = extractelement <1 x i32> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> undef, i32 [[TMP0]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]] ; RM-NEXT: [[TMP2:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> undef, <2 x i32> ; RM-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP2]], <2 x i32> ; RM-NEXT: [[BLOCK3:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP4:%.*]] = extractelement <1 x i32> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x i32> undef, i32 [[TMP4]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT4]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x i32> poison, i32 [[TMP4]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT4]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP5:%.*]] = mul <1 x i32> [[SPLAT_SPLAT5]], [[BLOCK3]] ; RM-NEXT: [[TMP6:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> undef, <2 x i32> ; RM-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP6]], <2 x i32> ; RM-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP8:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> undef, i32 [[TMP8]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT8]], [[BLOCK6]] ; RM-NEXT: [[TMP10:%.*]] = shufflevector <1 x i32> [[TMP9]], <1 x i32> undef, <2 x i32> ; RM-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP10]], <2 x i32> ; RM-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP12:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> undef, i32 [[TMP12]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT10]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT10]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP13:%.*]] = mul <1 x i32> [[SPLAT_SPLAT11]], [[BLOCK9]] ; RM-NEXT: [[TMP14:%.*]] = shufflevector <1 x i32> [[TMP13]], <1 x i32> undef, <2 x i32> ; RM-NEXT: [[TMP15:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP14]], <2 x i32> @@ -128,117 +127,117 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; RM-NEXT: [[SPLIT4:%.*]] = shufflevector <6 x i32> [[B]], <6 x i32> undef, <3 x i32> ; RM-NEXT: [[BLOCK:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> undef, i32 [[TMP0]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]] ; RM-NEXT: [[BLOCK5:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x i32> undef, i32 [[TMP2]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT6]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT6]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT7]], [[BLOCK5]] ; RM-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]] ; RM-NEXT: [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> undef, <3 x i32> ; RM-NEXT: [[TMP6:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP5]], <3 x i32> ; RM-NEXT: [[BLOCK8:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> undef, i32 [[TMP7]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT9]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT9]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP8:%.*]] = mul <1 x i32> [[SPLAT_SPLAT10]], [[BLOCK8]] ; RM-NEXT: [[BLOCK11:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i32> undef, i32 [[TMP9]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT12]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT12]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP10:%.*]] = mul <1 x i32> [[SPLAT_SPLAT13]], [[BLOCK11]] ; RM-NEXT: [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]] ; RM-NEXT: [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> undef, <3 x i32> ; RM-NEXT: [[TMP13:%.*]] = shufflevector <3 x i32> [[TMP6]], <3 x i32> [[TMP12]], <3 x i32> ; RM-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i32> undef, i32 [[TMP14]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT15]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT15]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT16]], [[BLOCK14]] ; RM-NEXT: [[BLOCK17:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> undef, i32 [[TMP16]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP17:%.*]] = mul <1 x i32> [[SPLAT_SPLAT19]], [[BLOCK17]] ; RM-NEXT: [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]] ; RM-NEXT: [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> undef, <3 x i32> ; RM-NEXT: [[TMP20:%.*]] = shufflevector <3 x i32> [[TMP13]], <3 x i32> [[TMP19]], <3 x i32> ; RM-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i32> undef, i32 [[TMP21]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT21]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT21]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP22:%.*]] = mul <1 x i32> [[SPLAT_SPLAT22]], [[BLOCK20]] ; RM-NEXT: [[BLOCK23:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> undef, i32 [[TMP23]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP24:%.*]] = mul <1 x i32> [[SPLAT_SPLAT25]], [[BLOCK23]] ; RM-NEXT: [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]] ; RM-NEXT: [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> undef, <3 x i32> ; RM-NEXT: [[TMP27:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP26]], <3 x i32> ; RM-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP28:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> undef, i32 [[TMP28]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT27]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> poison, i32 [[TMP28]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT27]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP29:%.*]] = mul <1 x i32> [[SPLAT_SPLAT28]], [[BLOCK26]] ; RM-NEXT: [[BLOCK29:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> undef, i32 [[TMP30]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> poison, i32 [[TMP30]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP31:%.*]] = mul <1 x i32> [[SPLAT_SPLAT31]], [[BLOCK29]] ; RM-NEXT: [[TMP32:%.*]] = add <1 x i32> [[TMP29]], [[TMP31]] ; RM-NEXT: [[TMP33:%.*]] = shufflevector <1 x i32> [[TMP32]], <1 x i32> undef, <3 x i32> ; RM-NEXT: [[TMP34:%.*]] = shufflevector <3 x i32> [[TMP27]], <3 x i32> [[TMP33]], <3 x i32> ; RM-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP35:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> undef, i32 [[TMP35]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT33]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> poison, i32 [[TMP35]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT33]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP36:%.*]] = mul <1 x i32> [[SPLAT_SPLAT34]], [[BLOCK32]] ; RM-NEXT: [[BLOCK35:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP37:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> undef, i32 [[TMP37]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT36]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> poison, i32 [[TMP37]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT36]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP38:%.*]] = mul <1 x i32> [[SPLAT_SPLAT37]], [[BLOCK35]] ; RM-NEXT: [[TMP39:%.*]] = add <1 x i32> [[TMP36]], [[TMP38]] ; RM-NEXT: [[TMP40:%.*]] = shufflevector <1 x i32> [[TMP39]], <1 x i32> undef, <3 x i32> ; RM-NEXT: [[TMP41:%.*]] = shufflevector <3 x i32> [[TMP34]], <3 x i32> [[TMP40]], <3 x i32> ; RM-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x i32> undef, i32 [[TMP42]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT39]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x i32> poison, i32 [[TMP42]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT39]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP43:%.*]] = mul <1 x i32> [[SPLAT_SPLAT40]], [[BLOCK38]] ; RM-NEXT: [[BLOCK41:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x i32> undef, i32 [[TMP44]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT42]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x i32> poison, i32 [[TMP44]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT42]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP45:%.*]] = mul <1 x i32> [[SPLAT_SPLAT43]], [[BLOCK41]] ; RM-NEXT: [[TMP46:%.*]] = add <1 x i32> [[TMP43]], [[TMP45]] ; RM-NEXT: [[TMP47:%.*]] = shufflevector <1 x i32> [[TMP46]], <1 x i32> undef, <3 x i32> ; RM-NEXT: [[TMP48:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP47]], <3 x i32> ; RM-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP49:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x i32> undef, i32 [[TMP49]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT45]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x i32> poison, i32 [[TMP49]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT45]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP50:%.*]] = mul <1 x i32> [[SPLAT_SPLAT46]], [[BLOCK44]] ; RM-NEXT: [[BLOCK47:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP51:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x i32> undef, i32 [[TMP51]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT48]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x i32> poison, i32 [[TMP51]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT48]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP52:%.*]] = mul <1 x i32> [[SPLAT_SPLAT49]], [[BLOCK47]] ; RM-NEXT: [[TMP53:%.*]] = add <1 x i32> [[TMP50]], [[TMP52]] ; RM-NEXT: [[TMP54:%.*]] = shufflevector <1 x i32> [[TMP53]], <1 x i32> undef, <3 x i32> ; RM-NEXT: [[TMP55:%.*]] = shufflevector <3 x i32> [[TMP48]], <3 x i32> [[TMP54]], <3 x i32> ; RM-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP56:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 -; RM-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x i32> undef, i32 [[TMP56]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT51]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x i32> poison, i32 [[TMP56]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT51]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP57:%.*]] = mul <1 x i32> [[SPLAT_SPLAT52]], [[BLOCK50]] ; RM-NEXT: [[BLOCK53:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> ; RM-NEXT: [[TMP58:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1 -; RM-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x i32> undef, i32 [[TMP58]], i32 0 -; RM-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT54]], <1 x i32> undef, <1 x i32> zeroinitializer +; RM-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x i32> poison, i32 [[TMP58]], i32 0 +; RM-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT54]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP59:%.*]] = mul <1 x i32> [[SPLAT_SPLAT55]], [[BLOCK53]] ; RM-NEXT: [[TMP60:%.*]] = add <1 x i32> [[TMP57]], [[TMP59]] ; RM-NEXT: [[TMP61:%.*]] = shufflevector <1 x i32> [[TMP60]], <1 x i32> undef, <3 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll index 4da36f9ff59a..cc47b0d23ed9 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll @@ -12,52 +12,52 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> undef, i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> undef, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[BLOCK4]], [[SPLAT_SPLAT6]] ; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> undef, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x i32> undef, i32 [[TMP7]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT8]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT8]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul <1 x i32> [[BLOCK7]], [[SPLAT_SPLAT9]] ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> undef, i32 [[TMP9]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = mul <1 x i32> [[BLOCK10]], [[SPLAT_SPLAT12]] ; CHECK-NEXT: [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> undef, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP12]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x i32> undef, i32 [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT14]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT14]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = mul <1 x i32> [[BLOCK13]], [[SPLAT_SPLAT15]] ; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x i32> undef, i32 [[TMP16]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT17]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT17]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = mul <1 x i32> [[BLOCK16]], [[SPLAT_SPLAT18]] ; CHECK-NEXT: [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> undef, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP19]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x i32> undef, i32 [[TMP21]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT20]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT20]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = mul <1 x i32> [[BLOCK19]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x i32> undef, i32 [[TMP23]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT23]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT23]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = mul <1 x i32> [[BLOCK22]], [[SPLAT_SPLAT24]] ; CHECK-NEXT: [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> undef, <2 x i32> @@ -80,29 +80,29 @@ define <4 x i32> @multiply_1x2(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> undef, <1 x i32> ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> undef, i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> undef, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[BLOCK3:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT4]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x i32> poison, i32 [[TMP4]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT4]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = mul <1 x i32> [[BLOCK3]], [[SPLAT_SPLAT5]] ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> undef, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <1 x i32> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> undef, i32 [[TMP8]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = mul <1 x i32> [[BLOCK6]], [[SPLAT_SPLAT8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x i32> [[TMP9]], <1 x i32> undef, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <1 x i32> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> undef, i32 [[TMP12]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT10]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT10]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = mul <1 x i32> [[BLOCK9]], [[SPLAT_SPLAT11]] ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <1 x i32> [[TMP13]], <1 x i32> undef, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP14]], <2 x i32> @@ -126,117 +126,117 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <6 x i32> [[B]], <6 x i32> undef, <2 x i32> ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> undef, i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK5:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x i32> undef, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT6]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT6]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[BLOCK5]], [[SPLAT_SPLAT7]] ; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> undef, <3 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP5]], <3 x i32> ; CHECK-NEXT: [[BLOCK8:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> undef, i32 [[TMP7]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT9]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT9]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul <1 x i32> [[BLOCK8]], [[SPLAT_SPLAT10]] ; CHECK-NEXT: [[BLOCK11:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i32> undef, i32 [[TMP9]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT12]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT12]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = mul <1 x i32> [[BLOCK11]], [[SPLAT_SPLAT13]] ; CHECK-NEXT: [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> undef, <3 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <3 x i32> [[TMP6]], <3 x i32> [[TMP12]], <3 x i32> ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i32> undef, i32 [[TMP14]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT15]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT15]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = mul <1 x i32> [[BLOCK14]], [[SPLAT_SPLAT16]] ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> undef, i32 [[TMP16]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = mul <1 x i32> [[BLOCK17]], [[SPLAT_SPLAT19]] ; CHECK-NEXT: [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> undef, <3 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <3 x i32> [[TMP13]], <3 x i32> [[TMP19]], <3 x i32> ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i32> undef, i32 [[TMP21]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT21]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT21]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = mul <1 x i32> [[BLOCK20]], [[SPLAT_SPLAT22]] ; CHECK-NEXT: [[BLOCK23:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> undef, i32 [[TMP23]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = mul <1 x i32> [[BLOCK23]], [[SPLAT_SPLAT25]] ; CHECK-NEXT: [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> undef, <3 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP26]], <3 x i32> ; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> undef, i32 [[TMP28]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT27]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> poison, i32 [[TMP28]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT27]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP29:%.*]] = mul <1 x i32> [[BLOCK26]], [[SPLAT_SPLAT28]] ; CHECK-NEXT: [[BLOCK29:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> undef, i32 [[TMP30]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> poison, i32 [[TMP30]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP31:%.*]] = mul <1 x i32> [[BLOCK29]], [[SPLAT_SPLAT31]] ; CHECK-NEXT: [[TMP32:%.*]] = add <1 x i32> [[TMP29]], [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <1 x i32> [[TMP32]], <1 x i32> undef, <3 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <3 x i32> [[TMP27]], <3 x i32> [[TMP33]], <3 x i32> ; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> undef, i32 [[TMP35]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT33]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> poison, i32 [[TMP35]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT33]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP36:%.*]] = mul <1 x i32> [[BLOCK32]], [[SPLAT_SPLAT34]] ; CHECK-NEXT: [[BLOCK35:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> undef, i32 [[TMP37]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT36]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> poison, i32 [[TMP37]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT36]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = mul <1 x i32> [[BLOCK35]], [[SPLAT_SPLAT37]] ; CHECK-NEXT: [[TMP39:%.*]] = add <1 x i32> [[TMP36]], [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x i32> [[TMP39]], <1 x i32> undef, <3 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <3 x i32> [[TMP34]], <3 x i32> [[TMP40]], <3 x i32> ; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x i32> undef, i32 [[TMP42]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT39]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x i32> poison, i32 [[TMP42]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT39]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP43:%.*]] = mul <1 x i32> [[BLOCK38]], [[SPLAT_SPLAT40]] ; CHECK-NEXT: [[BLOCK41:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x i32> undef, i32 [[TMP44]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT42]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x i32> poison, i32 [[TMP44]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT42]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP45:%.*]] = mul <1 x i32> [[BLOCK41]], [[SPLAT_SPLAT43]] ; CHECK-NEXT: [[TMP46:%.*]] = add <1 x i32> [[TMP43]], [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <1 x i32> [[TMP46]], <1 x i32> undef, <3 x i32> ; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP47]], <3 x i32> ; CHECK-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x i32> undef, i32 [[TMP49]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT45]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x i32> poison, i32 [[TMP49]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT45]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP50:%.*]] = mul <1 x i32> [[BLOCK44]], [[SPLAT_SPLAT46]] ; CHECK-NEXT: [[BLOCK47:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x i32> undef, i32 [[TMP51]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT48]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x i32> poison, i32 [[TMP51]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT48]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP52:%.*]] = mul <1 x i32> [[BLOCK47]], [[SPLAT_SPLAT49]] ; CHECK-NEXT: [[TMP53:%.*]] = add <1 x i32> [[TMP50]], [[TMP52]] ; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <1 x i32> [[TMP53]], <1 x i32> undef, <3 x i32> ; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <3 x i32> [[TMP48]], <3 x i32> [[TMP54]], <3 x i32> ; CHECK-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x i32> undef, i32 [[TMP56]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT51]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x i32> poison, i32 [[TMP56]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT51]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP57:%.*]] = mul <1 x i32> [[BLOCK50]], [[SPLAT_SPLAT52]] ; CHECK-NEXT: [[BLOCK53:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> ; CHECK-NEXT: [[TMP58:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x i32> undef, i32 [[TMP58]], i32 0 -; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT54]], <1 x i32> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x i32> poison, i32 [[TMP58]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT54]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP59:%.*]] = mul <1 x i32> [[BLOCK53]], [[SPLAT_SPLAT55]] ; CHECK-NEXT: [[TMP60:%.*]] = add <1 x i32> [[TMP57]], [[TMP59]] ; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <1 x i32> [[TMP60]], <1 x i32> undef, <3 x i32> diff --git a/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll b/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll index 789e48575ded..dece27b26e91 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll @@ -28,7 +28,7 @@ define <4 x i32> @square(<4 x i32> %num, i32 %y, i32 %x, i32 %h, i32 %k, i32 %w, ; CHECK-NEXT: [[DOTSCALAR7:%.*]] = add i32 [[DOTSCALAR6]], [[MUL21]] ; CHECK-NEXT: [[DOTSCALAR8:%.*]] = add i32 [[DOTSCALAR7]], 317425 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[DOTSCALAR8]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP2]], [[NUM:%.*]] ; CHECK-NEXT: ret <4 x i32> [[ADD29]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll index 0d99654be528..505f16786b60 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll @@ -28,7 +28,7 @@ define <4 x i32> @square(<4 x i32> %num, i32 %y, i32 %x, i32 %h, i32 %k, i32 %w, ; CHECK-NEXT: [[DOTSCALAR7:%.*]] = add i32 [[DOTSCALAR6]], [[MUL21]] ; CHECK-NEXT: [[DOTSCALAR8:%.*]] = add i32 [[DOTSCALAR7]], 317425 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[DOTSCALAR8]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP2]], [[NUM:%.*]] ; CHECK-NEXT: ret <4 x i32> [[ADD29]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll b/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll index bcb9a213f6ac..8c6d9d9467ae 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll @@ -164,7 +164,7 @@ define <4 x i32> @shuffle_8_add_32_masks_are_eq_and_can_be_converted_up(<16 x i8 define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up(<4 x i32> %v1) { ; CHECK-LABEL: @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <8 x i16> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[BC1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[SHUFFLE2]] ; @@ -180,7 +180,7 @@ define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up(<4 x i32> define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up(<4 x i32> %v1) { ; CHECK-LABEL: @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <8 x i16> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[BC1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[SHUFFLE2]] ; @@ -196,7 +196,7 @@ define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up(<4 x define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up(<4 x i32> %v1) { ; CHECK-LABEL: @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <16 x i8> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[BC1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[SHUFFLE2]] ; @@ -212,7 +212,7 @@ define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up(<4 x i32> % define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up(<4 x i32> %v1) { ; CHECK-LABEL: @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <16 x i8> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[BC1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[SHUFFLE2]] ; @@ -228,7 +228,7 @@ define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up(<4 x i3 define <4 x i32> @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up(<16 x i8> %v1) { ; CHECK-LABEL: @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[V1:%.*]] to <4 x i32> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[BC1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHUFFLE2]] ; @@ -244,7 +244,7 @@ define <4 x i32> @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up(<16 x i8> define <4 x i32> @shuffle_16_bitcast_32_shuffle_32_can_be_converted_up(<8 x i16> %v1) { ; CHECK-LABEL: @shuffle_16_bitcast_32_shuffle_32_can_be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V1:%.*]] to <4 x i32> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[BC1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHUFFLE2]] ; @@ -292,7 +292,7 @@ define <4 x i32> @shuffle_16_bitcast_32_shuffle_32_can_not_be_converted_up(<8 x define <8 x i16> @shuffle_8_bitcast_16_shuffle_16_can__be_converted_up(<16 x i8> %v1) { ; CHECK-LABEL: @shuffle_8_bitcast_16_shuffle_16_can__be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[V1:%.*]] to <8 x i16> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[BC1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[SHUFFLE2]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll b/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll index b9eb5671b8d1..210d9afce2a5 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll @@ -164,7 +164,7 @@ define <4 x i32> @shuffle_8_add_32_masks_are_eq_and_can_be_converted_up(<16 x i8 define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up(<4 x i32> %v1) { ; CHECK-LABEL: @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <8 x i16> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[BC1]], <8 x i16> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[SHUFFLE2]] ; @@ -180,7 +180,7 @@ define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up(<4 x i32> define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up(<4 x i32> %v1) { ; CHECK-LABEL: @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <8 x i16> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[BC1]], <8 x i16> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[SHUFFLE2]] ; @@ -196,7 +196,7 @@ define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up(<4 x define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up(<4 x i32> %v1) { ; CHECK-LABEL: @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <16 x i8> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[BC1]], <16 x i8> undef, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[SHUFFLE2]] ; @@ -212,7 +212,7 @@ define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up(<4 x i32> % define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up(<4 x i32> %v1) { ; CHECK-LABEL: @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <16 x i8> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[BC1]], <16 x i8> undef, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[SHUFFLE2]] ; @@ -228,7 +228,7 @@ define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up(<4 x i3 define <4 x i32> @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up(<16 x i8> %v1) { ; CHECK-LABEL: @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[V1:%.*]] to <4 x i32> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[BC1]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHUFFLE2]] ; @@ -244,7 +244,7 @@ define <4 x i32> @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up(<16 x i8> define <4 x i32> @shuffle_16_bitcast_32_shuffle_32_can_be_converted_up(<8 x i16> %v1) { ; CHECK-LABEL: @shuffle_16_bitcast_32_shuffle_32_can_be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V1:%.*]] to <4 x i32> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[BC1]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHUFFLE2]] ; @@ -292,7 +292,7 @@ define <4 x i32> @shuffle_16_bitcast_32_shuffle_32_can_not_be_converted_up(<8 x define <8 x i16> @shuffle_8_bitcast_16_shuffle_16_can__be_converted_up(<16 x i8> %v1) { ; CHECK-LABEL: @shuffle_8_bitcast_16_shuffle_16_can__be_converted_up( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[V1:%.*]] to <8 x i16> -; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[BC1]], <8 x i16> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[SHUFFLE2]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll index 4eb4c5ac1ed5..0308bb97790c 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -29,8 +29,8 @@ define void @vdiv(double* %x, double* %y, double %a, i32 %N) #0 { ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PREHEADER]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> undef, double [[A:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[N_VEC]], -4 ; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index 8f00bd8f1e4c..df7c8aab2cd8 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -24,9 +24,9 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SHIFT1]] ; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 ; CHECK-NEXT: ret i32 [[X210]] @@ -41,11 +41,11 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y:%.*]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SHIFT1]] -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[SHIFT2]] ; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 ; CHECK-NEXT: ret i32 [[X2Y210]] @@ -293,13 +293,13 @@ define i1 @cmp_lt_gt(double %a, double %b, double %c) { ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> undef, <2 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i1> [[TMP8]], [[SHIFT]] ; CHECK-NEXT: [[OR_COND:%.*]] = extractelement <2 x i1> [[TMP9]], i64 0 ; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: ; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <2 x i1> [[TMP10]], <2 x i1> undef, <2 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <2 x i1> [[TMP10]], <2 x i1> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = or <2 x i1> [[TMP10]], [[SHIFT2]] ; CHECK-NEXT: [[NOT_OR_COND1:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 ; CHECK-NEXT: ret i1 [[NOT_OR_COND1]] diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll b/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll index 41751b0651b1..2d3eb6252aa9 100644 --- a/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll @@ -19,9 +19,9 @@ define i32 addrspace(1)* @test1(i8 addrspace(1)* %base1, <2 x i64> %offsets) gc ; CHECK-NEXT: [[BASE_I32:%.*]] = bitcast i8 addrspace(1)* [[PHI]] to i32 addrspace(1)* ; CHECK-NEXT: [[CAST:%.*]] = bitcast i8 addrspace(1)* [[PHI_BASE]] to i32 addrspace(1)* ; CHECK-NEXT: [[DOTSPLATINSERT_BASE:%.*]] = insertelement <2 x i32 addrspace(1)*> zeroinitializer, i32 addrspace(1)* [[CAST]], i32 0, !is_base_value !0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32 addrspace(1)*> undef, i32 addrspace(1)* [[BASE_I32]], i32 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32 addrspace(1)*> poison, i32 addrspace(1)* [[BASE_I32]], i32 0 ; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT_BASE]], <2 x i32 addrspace(1)*> zeroinitializer, <2 x i32> zeroinitializer, !is_base_value !0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT]], <2 x i32 addrspace(1)*> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT]], <2 x i32 addrspace(1)*> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC:%.*]] = getelementptr i32, <2 x i32 addrspace(1)*> [[DOTSPLAT]], <2 x i64> [[OFFSETS:%.*]] ; CHECK-NEXT: [[PTR_BASE:%.*]] = extractelement <2 x i32 addrspace(1)*> [[DOTSPLAT_BASE]], i32 1, !is_base_value !0 ; CHECK-NEXT: [[PTR:%.*]] = extractelement <2 x i32 addrspace(1)*> [[VEC]], i32 1 @@ -54,9 +54,9 @@ define i32 addrspace(1)* @test2(i8 addrspace(1)* %base, <2 x i64> %offsets) gc " ; CHECK-NEXT: [[BASE_I32:%.*]] = bitcast i8 addrspace(1)* [[BASE:%.*]] to i32 addrspace(1)* ; CHECK-NEXT: [[CAST:%.*]] = bitcast i8 addrspace(1)* [[BASE]] to i32 addrspace(1)* ; CHECK-NEXT: [[DOTSPLATINSERT_BASE:%.*]] = insertelement <2 x i32 addrspace(1)*> zeroinitializer, i32 addrspace(1)* [[CAST]], i32 0, !is_base_value !0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32 addrspace(1)*> undef, i32 addrspace(1)* [[BASE_I32]], i32 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32 addrspace(1)*> poison, i32 addrspace(1)* [[BASE_I32]], i32 0 ; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT_BASE]], <2 x i32 addrspace(1)*> zeroinitializer, <2 x i32> zeroinitializer, !is_base_value !0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT]], <2 x i32 addrspace(1)*> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT]], <2 x i32 addrspace(1)*> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC:%.*]] = getelementptr i32, <2 x i32 addrspace(1)*> [[DOTSPLAT]], <2 x i64> [[OFFSETS:%.*]] ; CHECK-NEXT: [[PTR_BASE:%.*]] = extractelement <2 x i32 addrspace(1)*> [[DOTSPLAT_BASE]], i32 1, !is_base_value !0 ; CHECK-NEXT: [[PTR:%.*]] = extractelement <2 x i32 addrspace(1)*> [[VEC]], i32 1 @@ -102,9 +102,9 @@ define i32 addrspace(1)* @test4(i8 addrspace(1)* %base, <2 x i64> %offsets) gc " ; CHECK-NEXT: [[BASE_I32:%.*]] = bitcast i8 addrspace(1)* [[BASE:%.*]] to i32 addrspace(1)* ; CHECK-NEXT: [[CAST:%.*]] = bitcast i8 addrspace(1)* [[BASE]] to i32 addrspace(1)* ; CHECK-NEXT: [[DOTSPLATINSERT_BASE:%.*]] = insertelement <2 x i32 addrspace(1)*> zeroinitializer, i32 addrspace(1)* [[CAST]], i32 0, !is_base_value !0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32 addrspace(1)*> undef, i32 addrspace(1)* [[BASE_I32]], i32 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32 addrspace(1)*> poison, i32 addrspace(1)* [[BASE_I32]], i32 0 ; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT_BASE]], <2 x i32 addrspace(1)*> zeroinitializer, <2 x i32> zeroinitializer, !is_base_value !0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT]], <2 x i32 addrspace(1)*> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT]], <2 x i32 addrspace(1)*> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC:%.*]] = getelementptr i32, <2 x i32 addrspace(1)*> [[DOTSPLAT]], <2 x i64> [[OFFSETS:%.*]] ; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(<2 x i32 addrspace(1)*> [[VEC]], <2 x i32 addrspace(1)*> [[DOTSPLAT_BASE]]) ] ; CHECK-NEXT: [[VEC_RELOCATED:%.*]] = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token [[STATEPOINT_TOKEN]], i32 1, i32 0) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll index 4f2c00259580..d1754c0bbc54 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll @@ -3,7 +3,7 @@ define void @f1(<2 x i16> %x, i16* %a) { ; CHECK-LABEL: @f1( -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[X:%.*]], <2 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[X:%.*]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 ; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 ; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 @@ -35,7 +35,7 @@ define void @f2(<2 x i16> %x, i16* %a) { ; CHECK: cont: ; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ] ; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 ; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 ; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 @@ -82,7 +82,7 @@ define void @f3(<2 x i16> %x, i16* %a) { ; CHECK: cont: ; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ] ; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 ; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 ; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll index 2768af342aae..741dbcec392e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll @@ -6,7 +6,7 @@ define void @i64_simplified(i64* noalias %st, i64* noalias %ld) { ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3 @@ -35,7 +35,7 @@ define void @i64_simplifiedi_reversed(i64* noalias %st, i64* noalias %ld) { ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3 @@ -64,7 +64,7 @@ define void @i64_simplifiedi_extract(i64* noalias %st, i64* noalias %ld) { ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll index 25c039a73a96..2e2e5f8452e4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -8,7 +8,7 @@ define void @Test(i32) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP15:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]]) @@ -57,7 +57,7 @@ define void @Test(i32) { ; FORCE_REDUCTION-NEXT: br label [[LOOP:%.*]] ; FORCE_REDUCTION: loop: ; FORCE_REDUCTION-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] -; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> +; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1 ; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], ; FORCE_REDUCTION-NEXT: [[VAL_20:%.*]] = add i32 [[TMP2]], 1496 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll index ba499e29eef8..7fd1cbd8b1ce 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll @@ -8,7 +8,7 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: br label [[BCI_15:%.*]] ; CHECK: bci_15: ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP7:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <16 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15 ; CHECK-NEXT: store atomic i32 [[TMP3]], i32* [[VALS:%.*]] unordered, align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll index 96ca463b5c43..fa3d82201203 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll @@ -19,35 +19,35 @@ define void @splat(i8 %a, i8 %b, i8 %c) { ; SSE-NEXT: [[TMP1:%.*]] = xor i8 [[C:%.*]], [[A:%.*]] ; SSE-NEXT: store i8 [[TMP1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 0), align 16 ; SSE-NEXT: [[TMP2:%.*]] = xor i8 [[A]], [[C]] -; SSE-NEXT: store i8 [[TMP2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 1) +; SSE-NEXT: store i8 [[TMP2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 1), align 1 ; SSE-NEXT: [[TMP3:%.*]] = xor i8 [[A]], [[C]] -; SSE-NEXT: store i8 [[TMP3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 2) +; SSE-NEXT: store i8 [[TMP3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 2), align 1 ; SSE-NEXT: [[TMP4:%.*]] = xor i8 [[A]], [[C]] -; SSE-NEXT: store i8 [[TMP4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 3) +; SSE-NEXT: store i8 [[TMP4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 3), align 1 ; SSE-NEXT: [[TMP5:%.*]] = xor i8 [[C]], [[A]] -; SSE-NEXT: store i8 [[TMP5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 4) +; SSE-NEXT: store i8 [[TMP5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 4), align 1 ; SSE-NEXT: [[TMP6:%.*]] = xor i8 [[C]], [[B:%.*]] -; SSE-NEXT: store i8 [[TMP6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 5) +; SSE-NEXT: store i8 [[TMP6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 5), align 1 ; SSE-NEXT: [[TMP7:%.*]] = xor i8 [[C]], [[A]] -; SSE-NEXT: store i8 [[TMP7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 6) +; SSE-NEXT: store i8 [[TMP7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 6), align 1 ; SSE-NEXT: [[TMP8:%.*]] = xor i8 [[C]], [[B]] -; SSE-NEXT: store i8 [[TMP8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 7) +; SSE-NEXT: store i8 [[TMP8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 7), align 1 ; SSE-NEXT: [[TMP9:%.*]] = xor i8 [[A]], [[C]] -; SSE-NEXT: store i8 [[TMP9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 8) +; SSE-NEXT: store i8 [[TMP9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 8), align 1 ; SSE-NEXT: [[TMP10:%.*]] = xor i8 [[A]], [[C]] -; SSE-NEXT: store i8 [[TMP10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 9) +; SSE-NEXT: store i8 [[TMP10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 9), align 1 ; SSE-NEXT: [[TMP11:%.*]] = xor i8 [[A]], [[C]] -; SSE-NEXT: store i8 [[TMP11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 10) +; SSE-NEXT: store i8 [[TMP11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 10), align 1 ; SSE-NEXT: [[TMP12:%.*]] = xor i8 [[A]], [[C]] -; SSE-NEXT: store i8 [[TMP12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 11) +; SSE-NEXT: store i8 [[TMP12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 11), align 1 ; SSE-NEXT: [[TMP13:%.*]] = xor i8 [[A]], [[C]] -; SSE-NEXT: store i8 [[TMP13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 12) +; SSE-NEXT: store i8 [[TMP13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 12), align 1 ; SSE-NEXT: [[TMP14:%.*]] = xor i8 [[A]], [[C]] -; SSE-NEXT: store i8 [[TMP14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 13) +; SSE-NEXT: store i8 [[TMP14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 13), align 1 ; SSE-NEXT: [[TMP15:%.*]] = xor i8 [[A]], [[C]] -; SSE-NEXT: store i8 [[TMP15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 14) +; SSE-NEXT: store i8 [[TMP15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 14), align 1 ; SSE-NEXT: [[TMP16:%.*]] = xor i8 [[A]], [[C]] -; SSE-NEXT: store i8 [[TMP16]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 15) +; SSE-NEXT: store i8 [[TMP16]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 15), align 1 ; SSE-NEXT: ret void ; ; AVX-LABEL: @splat( @@ -69,7 +69,7 @@ define void @splat(i8 %a, i8 %b, i8 %c) { ; AVX-NEXT: [[TMP16:%.*]] = insertelement <16 x i8> [[TMP15]], i8 [[C]], i32 15 ; AVX-NEXT: [[TMP17:%.*]] = insertelement <2 x i8> undef, i8 [[A:%.*]], i32 0 ; AVX-NEXT: [[TMP18:%.*]] = insertelement <2 x i8> [[TMP17]], i8 [[B:%.*]], i32 1 -; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i8> [[TMP18]], <2 x i8> undef, <16 x i32> +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i8> [[TMP18]], <2 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP19:%.*]] = xor <16 x i8> [[TMP16]], [[SHUFFLE]] ; AVX-NEXT: store <16 x i8> [[TMP19]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16 ; AVX-NEXT: ret void @@ -121,11 +121,11 @@ define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) { ; SSE-NEXT: [[TMP1:%.*]] = xor i32 [[ADD1]], [[A]] ; SSE-NEXT: store i32 [[TMP1]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 0), align 16 ; SSE-NEXT: [[TMP2:%.*]] = xor i32 [[B:%.*]], [[ADD2]] -; SSE-NEXT: store i32 [[TMP2]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 1) +; SSE-NEXT: store i32 [[TMP2]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 1), align 4 ; SSE-NEXT: [[TMP3:%.*]] = xor i32 [[C]], [[ADD3]] -; SSE-NEXT: store i32 [[TMP3]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 2) +; SSE-NEXT: store i32 [[TMP3]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 2), align 4 ; SSE-NEXT: [[TMP4:%.*]] = xor i32 [[A]], [[ADD4]] -; SSE-NEXT: store i32 [[TMP4]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 3) +; SSE-NEXT: store i32 [[TMP4]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 3), align 4 ; SSE-NEXT: ret void ; ; AVX-LABEL: @same_opcode_on_one_side( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load-inseltpoison.ll index 45b65f165786..f1ece799f1b0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load-inseltpoison.ll @@ -12,7 +12,7 @@ define void @hoge(i64 %idx, <4 x i32>* %sink) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 8 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load.ll index a777b98852f9..4a4b7d006888 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load.ll @@ -12,16 +12,16 @@ define void @hoge(i64 %idx, <4 x i32>* %sink) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 8 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[REORDER_SHUFFLE]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[REORDER_SHUFFLE]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[REORDER_SHUFFLE]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 2 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP10]], i32 2 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[REORDER_SHUFFLE]], i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP12]], i32 3 -; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[SINK:%.*]] +; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[SINK:%.*]], align 16 ; CHECK-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll index 5ee85a8d9a86..6d56c587cdc1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll @@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0" define void @fextr(double* %ptr) { ; CHECK-LABEL: @fextr( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* undef +; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* undef, align 16 ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[LD]], ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P0]] to <2 x double>* @@ -29,10 +29,10 @@ entry: define void @fextr1(double* %ptr) { ; CHECK-LABEL: @fextr1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* undef -; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x double> [[LD]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* undef, align 16 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[LD]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[REORDER_SHUFFLE]], +; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[SHUFFLE]], ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P1]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP0]], <2 x double>* [[TMP1]], align 4 ; CHECK-NEXT: ret void @@ -53,7 +53,7 @@ entry: define void @fextr2(double* %ptr) { ; CHECK-LABEL: @fextr2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LD:%.*]] = load <4 x double>, <4 x double>* undef +; CHECK-NEXT: [[LD:%.*]] = load <4 x double>, <4 x double>* undef, align 32 ; CHECK-NEXT: [[V0:%.*]] = extractelement <4 x double> [[LD]], i32 0 ; CHECK-NEXT: [[V1:%.*]] = extractelement <4 x double> [[LD]], i32 1 ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll index 8120dc835f07..b9ca4af7d7ea 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll @@ -18,7 +18,7 @@ define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[N:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[K:%.*]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_024:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD10:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll index 003db8629786..57096d447c38 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -126,9 +126,9 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; doesn't matter define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; CHECK-LABEL: @simple_select_insert_out_of_order( -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[SHUFFLE]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[SHUFFLE1]], <4 x float> [[SHUFFLE2]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index 482bfc385025..8403fdb586cb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -126,11 +126,11 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; doesn't matter define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; CHECK-LABEL: @simple_select_insert_out_of_order( -; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[REORDER_SHUFFLE1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[REORDER_SHUFFLE2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[REORDER_SHUFFLE]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[REORDER_SHUFFLE1]], <4 x float> [[REORDER_SHUFFLE2]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[SHUFFLE]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[SHUFFLE1]], <4 x float> [[SHUFFLE2]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 ; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 @@ -184,7 +184,7 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32 ; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 ; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3 -; CHECK-NEXT: call void @v4f32_user(<4 x float> [[RD]]) #0 +; CHECK-NEXT: call void @v4f32_user(<4 x float> [[RD]]) [[ATTR0:#.*]] ; CHECK-NEXT: ret <4 x float> [[RD]] ; %c0 = extractelement <4 x i32> %c, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll index 0d314fdb7bad..196d539bcd3c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll @@ -8,9 +8,9 @@ define i32 @fn1() { ; CHECK-LABEL: @fn1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4 -; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[REORDER_SHUFFLE]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[REORDER_SHUFFLE]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll index 7856b7bd6b01..820d85eb309d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll @@ -27,8 +27,8 @@ define void @jumble1(i32* noalias nocapture readonly %A, i32* noalias nocapture ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 -; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP1]], [[REORDER_SHUFFLE]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP1]], [[SHUFFLE]] ; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 @@ -83,8 +83,8 @@ define void @jumble2(i32* noalias nocapture readonly %A, i32* noalias nocapture ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 -; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[REORDER_SHUFFLE]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], [[TMP1]] ; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll index 568fd9f3ac79..02716b198187 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll @@ -48,11 +48,11 @@ define void @phiUsingLoads(i32* noalias nocapture readonly %A, i32* noalias noca ; CHECK-NEXT: [[ARRAYIDX65:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 ; CHECK-NEXT: [[ARRAYIDX66:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP27:%.*]], <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: store <4 x i32> [[TMP26:%.*]], <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ undef, [[ENTRY]] ], [ [[TMP27]], [[FOR_INC]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ undef, [[ENTRY]] ], [ [[TMP26]], [[FOR_INC]] ] ; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] @@ -109,10 +109,10 @@ define void @phiUsingLoads(i32* noalias nocapture readonly %A, i32* noalias noca ; CHECK-NEXT: [[ARRAYIDX58:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP23]] ; CHECK-NEXT: [[TMP24:%.*]] = bitcast i32* [[ARRAYIDX49]] to <4 x i32>* ; CHECK-NEXT: [[TMP25:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i32> [[TMP25]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP25]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[TMP27]] = phi <4 x i32> [ [[TMP7]], [[IF_THEN]] ], [ [[TMP13]], [[IF_THEN14]] ], [ [[TMP19]], [[IF_THEN30]] ], [ [[TMP26]], [[IF_THEN46]] ], [ [[TMP2]], [[IF_ELSE43]] ] +; CHECK-NEXT: [[TMP26]] = phi <4 x i32> [ [[TMP7]], [[IF_THEN]] ], [ [[TMP13]], [[IF_THEN14]] ], [ [[TMP19]], [[IF_THEN30]] ], [ [[SHUFFLE]], [[IF_THEN46]] ], [ [[TMP2]], [[IF_ELSE43]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll index 4887fe6c5f9b..32a187b5240d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll @@ -11,15 +11,15 @@ define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 2 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 3 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 1 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[INN_ADDR]] to <4 x i32>* ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[REORDER_SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[REORDER_SHUFFLE]], [[REORDER_SHUFFLE1]] +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1 ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2 @@ -69,16 +69,16 @@ define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias noca ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[REORDER_SHUFFLE]], i32 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[REORDER_SHUFFLE]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[REORDER_SHUFFLE]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[REORDER_SHUFFLE]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[REORDER_SHUFFLE]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[SHUFFLE]], [[TMP10]] ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1 ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll index a84b1f7e4fcd..d20bd5edc748 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll @@ -26,7 +26,7 @@ define dso_local void @j() local_unnamed_addr { ; CHECK-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x float> ; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> , [[TMP7]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 0 ; CHECK-NEXT: store float [[TMP9]], float* @g, align 4 ; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x float> [[SHUFFLE]], @@ -49,9 +49,9 @@ define dso_local void @j() local_unnamed_addr { ; CHECK-NEXT: [[TMP20:%.*]] = fsub <4 x float> [[TMP10]], [[TMP18]] ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = fptosi <4 x float> [[TMP21]] to <4 x i32> -; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP22]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP22]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[REORDER_SHUFFLE]], <4 x i32>* [[TMP23]], align 4 +; CHECK-NEXT: store <4 x i32> [[SHUFFLE1]], <4 x i32>* [[TMP23]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll index c84415642130..078a965656c9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll @@ -84,7 +84,7 @@ define <4 x float> @PR16739_byref_alt(<4 x float>* nocapture readonly dereferenc ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 0 ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> [[I0]], float [[TMP3]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll index aea89375aeae..793ccb5b4444 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll @@ -84,7 +84,7 @@ define <4 x float> @PR16739_byref_alt(<4 x float>* nocapture readonly dereferenc ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 0 ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> [[I0]], float [[TMP3]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll index 0ed88a3c8212..a9857ad4650d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll @@ -15,7 +15,7 @@ define void @get_block(i32 %y_pos) local_unnamed_addr #0 { ; CHECK-NEXT: [[SHR15:%.*]] = ashr i32 [[SUB14]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[SHR15]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[SUB14]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[SHR15]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 undef, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll index cb7e774c0645..8ef9d0845a4d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -144,15 +144,15 @@ define float @foo3(float* nocapture readonly %A) #0 { ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[REORDER_SHUFFLE]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 3 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[TMP3]], [[ENTRY]] ], [ [[TMP11:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[REORDER_SHUFFLE]], [[ENTRY]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[SHUFFLE]], [[ENTRY]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 7.000000e+00 ; CHECK-NEXT: [[ADD6]] = fadd float [[R_052]], [[MUL]] ; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[INDVARS_IV]], 2 @@ -162,10 +162,10 @@ define float @foo3(float* nocapture readonly %A) #0 { ; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>* ; CHECK-NEXT: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4 -; CHECK-NEXT: [[REORDER_SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP11]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP11]] = extractelement <2 x float> [[SHUFFLE1]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 1 +; CHECK-NEXT: [[TMP13]] = extractelement <2 x float> [[SHUFFLE1]], i32 1 ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP13]], i32 1 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP8]], i32 2 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP4]], i32 3 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll index ec51b0b60ad1..e54025ca1b61 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll @@ -25,7 +25,7 @@ define void @foo() { ; ; AVX-LABEL: @foo( ; AVX-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> , i32 8, <2 x i1> , <2 x i32> undef) -; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> ; AVX-NEXT: store <8 x i32> [[SHUFFLE]], <8 x i32>* bitcast ([8 x i32]* @a to <8 x i32>*), align 16 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll index 12d2355561b7..976c7a847a19 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll @@ -226,10 +226,10 @@ define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[REORDER_SHUFFLE]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[SHUFFLE]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll index 4cbc06ae6035..5bba359a843e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -16,14 +16,14 @@ define void @hoge() { ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <2 x i32> , [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef -; CHECK-NEXT: [[SHUFFLE5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE5]], ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[TMP6]], i32 undef ; CHECK-NEXT: [[T20:%.*]] = icmp sgt i32 [[T19]], 63 ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[TMP10]], undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll index 62fa52c46ce6..769c752ecb06 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll @@ -17,7 +17,7 @@ define i32 @foo(i32 %0, i32* %1, float* %2) { ; CHECK: t37: ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x float> [ [[TMP5]], [[TMP3:%.*]] ], [ [[T89:%.*]], [[T37]] ] ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast <2 x float> , [[TMP6]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[T21:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[T4]], i64 0, i32 2, i64 0 ; CHECK-NEXT: [[T25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[T4]], i64 0, i32 2, i64 1 ; CHECK-NEXT: [[T31:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[T4]], i64 0, i32 2, i64 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll b/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll index e99864205bfc..ced403ae5375 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll @@ -22,7 +22,7 @@ define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1 ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2 ; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3 -; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[REORDER_SHUFFLE]], <4 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: ret i32 undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll index fd697f1a7ca5..9983578a7058 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll @@ -97,7 +97,7 @@ define void @store_reverse(i64* %p3) { ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 4 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[ARRAYIDX14]] to <4 x i64>* ; CHECK-NEXT: store <4 x i64> [[TMP5]], <4 x i64>* [[TMP6]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll index 31b73236f5e7..e462a486f58b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll @@ -7,7 +7,7 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[A7:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A8:%.*]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A1:%.*]], i32 2 @@ -57,7 +57,7 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[A6:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A4:%.*]], i32 2 @@ -111,7 +111,7 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[A4:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A6:%.*]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A5:%.*]], i32 2 diff --git a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll index ce33efc951fc..e22a7f2c8cac 100644 --- a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll +++ b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll @@ -137,7 +137,7 @@ define void @phi_float32(half %hval, float %fval) { ; MAX256-NEXT: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[HVAL]], i32 2 ; MAX256-NEXT: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[HVAL]], i32 3 ; MAX256-NEXT: [[TMP4:%.*]] = fpext <4 x half> [[TMP3]] to <4 x float> -; MAX256-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> undef, <8 x i32> +; MAX256-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <8 x i32> ; MAX256-NEXT: [[TMP5:%.*]] = insertelement <8 x float> undef, float [[FVAL:%.*]], i32 0 ; MAX256-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[FVAL]], i32 1 ; MAX256-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[FVAL]], i32 2 @@ -315,7 +315,7 @@ define void @phi_float32(half %hval, float %fval) { ; MAX1024-NEXT: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[HVAL]], i32 2 ; MAX1024-NEXT: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[HVAL]], i32 3 ; MAX1024-NEXT: [[TMP4:%.*]] = fpext <4 x half> [[TMP3]] to <4 x float> -; MAX1024-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> undef, <32 x i32> +; MAX1024-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <32 x i32> ; MAX1024-NEXT: [[TMP5:%.*]] = insertelement <32 x float> undef, float [[FVAL:%.*]], i32 0 ; MAX1024-NEXT: [[TMP6:%.*]] = insertelement <32 x float> [[TMP5]], float [[FVAL]], i32 1 ; MAX1024-NEXT: [[TMP7:%.*]] = insertelement <32 x float> [[TMP6]], float [[FVAL]], i32 2 diff --git a/llvm/test/Transforms/Scalarizer/vector-gep.ll b/llvm/test/Transforms/Scalarizer/vector-gep.ll index 81566067401c..7a0d387c84bd 100644 --- a/llvm/test/Transforms/Scalarizer/vector-gep.ll +++ b/llvm/test/Transforms/Scalarizer/vector-gep.ll @@ -44,8 +44,8 @@ bb: ;CHECK: %[[I2:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 2 ;CHECK: %[[I3:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 3 ;CHECK: %index = load i16, i16* @index -;CHECK: %.splatinsert = insertelement <4 x i16> undef, i16 %index, i32 0 -;CHECK: %.splat = shufflevector <4 x i16> %.splatinsert, <4 x i16> undef, <4 x i32> zeroinitializer +;CHECK: %.splatinsert = insertelement <4 x i16> poison, i16 %index, i32 0 +;CHECK: %.splat = shufflevector <4 x i16> %.splatinsert, <4 x i16> poison, <4 x i32> zeroinitializer ;CHECK: %.splat[[I0]] = extractelement <4 x i16> %.splat, i32 0 ;CHECK: getelementptr i16, i16* %[[I0]], i16 %.splat[[I0]] ;CHECK: %.splat[[I1]] = extractelement <4 x i16> %.splat, i32 1 @@ -69,8 +69,8 @@ bb: ;CHECK-LABEL: @test3 ;CHECK: %0 = bitcast [4 x i16]* @ptr to i16* -;CHECK: %.splatinsert = insertelement <4 x i16*> undef, i16* %0, i32 0 -;CHECK: %.splat = shufflevector <4 x i16*> %.splatinsert, <4 x i16*> undef, <4 x i32> zeroinitializer +;CHECK: %.splatinsert = insertelement <4 x i16*> poison, i16* %0, i32 0 +;CHECK: %.splat = shufflevector <4 x i16*> %.splatinsert, <4 x i16*> poison, <4 x i32> zeroinitializer ;CHECK: %.splat[[I0:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 0 ;CHECK: getelementptr i16, i16* %.splat[[I0]], i16 0 ;CHECK: %.splat[[I1:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 1 @@ -91,8 +91,8 @@ bb: ;CHECK-LABEL: @test4 ;CHECK: %0 = load i16*, i16** @ptrptr -;CHECK: %.splatinsert = insertelement <4 x i16*> undef, i16* %0, i32 0 -;CHECK: %.splat = shufflevector <4 x i16*> %.splatinsert, <4 x i16*> undef, <4 x i32> zeroinitializer +;CHECK: %.splatinsert = insertelement <4 x i16*> poison, i16* %0, i32 0 +;CHECK: %.splat = shufflevector <4 x i16*> %.splatinsert, <4 x i16*> poison, <4 x i32> zeroinitializer ;CHECK: %.splat[[I0:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 0 ;CHECK: getelementptr i16, i16* %.splat[[I0]], i16 0 ;CHECK: %.splat[[I1:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 1 diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll index 51d5f71e5536..5081f2f1fad7 100644 --- a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll @@ -15,7 +15,7 @@ define protected amdgpu_kernel void @load_from_other_as(<4 x float>* nocapture n ; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_HOGE]], %struct.hoge* [[B]], i64 0, i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[C]] to <1 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, <1 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> undef, <4 x i32> +; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> ; CHECK-NEXT: store <4 x float> [[E]], <4 x float>* [[RESULTPTR:%.*]], align 16 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll index 5ff7fe847047..99ce0619ee93 100644 --- a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll @@ -15,7 +15,7 @@ define protected amdgpu_kernel void @load_from_other_as(<4 x float>* nocapture n ; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_HOGE]], %struct.hoge* [[B]], i64 0, i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[C]] to <1 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, <1 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> undef, <4 x i32> +; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> ; CHECK-NEXT: store <4 x float> [[E]], <4 x float>* [[RESULTPTR:%.*]], align 16 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll index c182b35f5539..472c78afaaa1 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll @@ -259,7 +259,7 @@ define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) { ; SSE-NEXT: ret i8 [[R]] ; ; AVX-LABEL: @ext0_ext1_add( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> undef, <16 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0 ; AVX-NEXT: ret i8 [[R]] @@ -278,7 +278,7 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) { ; SSE-NEXT: ret i8 [[R]] ; ; AVX-LABEL: @ext5_ext0_add( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> undef, <16 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]] ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0 ; AVX-NEXT: ret i8 [[R]] @@ -297,7 +297,7 @@ define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) { ; SSE-NEXT: ret i8 [[R]] ; ; AVX-LABEL: @ext1_ext6_add( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> undef, <16 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1 ; AVX-NEXT: ret i8 [[R]] @@ -310,7 +310,7 @@ define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) { define float @ext1_ext0_fmul(<4 x float> %x) { ; CHECK-LABEL: @ext1_ext0_fmul( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[SHIFT]], [[X]] ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 ; CHECK-NEXT: ret float [[R]] @@ -325,7 +325,7 @@ define float @ext0_ext3_fmul_extra_use1(<4 x float> %x) { ; CHECK-LABEL: @ext0_ext3_fmul_extra_use1( ; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 ; CHECK-NEXT: call void @use_f32(float [[E0]]) -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fmul nnan <4 x float> [[X]], [[SHIFT]] ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 ; CHECK-NEXT: ret float [[R]] @@ -360,7 +360,7 @@ define float @ext0_ext4_fmul_v8f32(<8 x float> %x) { ; SSE-NEXT: ret float [[R]] ; ; AVX-LABEL: @ext0_ext4_fmul_v8f32( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> undef, <8 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[X]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i32 0 ; AVX-NEXT: ret float [[R]] @@ -379,7 +379,7 @@ define float @ext7_ext4_fmul_v8f32(<8 x float> %x) { ; SSE-NEXT: ret float [[R]] ; ; AVX-LABEL: @ext7_ext4_fmul_v8f32( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> undef, <8 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]] ; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i64 4 ; AVX-NEXT: ret float [[R]] @@ -418,7 +418,7 @@ define float @ext14_ext15_fmul_v16f32(<16 x float> %x) { define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @ins_bo_ext_ext( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3 @@ -436,7 +436,7 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) { define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @ins_bo_ext_ext_uses( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 ; CHECK-NEXT: call void @use_f32(float [[A23]]) @@ -453,13 +453,13 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) { define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @PR34724( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]] ; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] ; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3 ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1 @@ -490,11 +490,11 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @ext_ext_or_reduction_v4i32( ; CHECK-NEXT: [[Z:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[Z]], [[SHIFT]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], [[SHIFT1]] -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[SHIFT2]], [[TMP2]] ; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 ; CHECK-NEXT: ret i32 [[Z0123]] @@ -512,9 +512,9 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]] ; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 ; CHECK-NEXT: ret i32 [[X210]] @@ -529,11 +529,11 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]] -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHIFT2]], [[TMP2]] ; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 ; CHECK-NEXT: ret i32 [[X2Y210]] diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll index ac7ca1e8f8e2..abf5ab57efcb 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll @@ -259,7 +259,7 @@ define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) { ; SSE-NEXT: ret i8 [[R]] ; ; AVX-LABEL: @ext0_ext1_add( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> undef, <16 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0 ; AVX-NEXT: ret i8 [[R]] @@ -278,7 +278,7 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) { ; SSE-NEXT: ret i8 [[R]] ; ; AVX-LABEL: @ext5_ext0_add( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> undef, <16 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]] ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0 ; AVX-NEXT: ret i8 [[R]] @@ -297,7 +297,7 @@ define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) { ; SSE-NEXT: ret i8 [[R]] ; ; AVX-LABEL: @ext1_ext6_add( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> undef, <16 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1 ; AVX-NEXT: ret i8 [[R]] @@ -310,7 +310,7 @@ define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) { define float @ext1_ext0_fmul(<4 x float> %x) { ; CHECK-LABEL: @ext1_ext0_fmul( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[SHIFT]], [[X]] ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 ; CHECK-NEXT: ret float [[R]] @@ -325,7 +325,7 @@ define float @ext0_ext3_fmul_extra_use1(<4 x float> %x) { ; CHECK-LABEL: @ext0_ext3_fmul_extra_use1( ; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 ; CHECK-NEXT: call void @use_f32(float [[E0]]) -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fmul nnan <4 x float> [[X]], [[SHIFT]] ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 ; CHECK-NEXT: ret float [[R]] @@ -360,7 +360,7 @@ define float @ext0_ext4_fmul_v8f32(<8 x float> %x) { ; SSE-NEXT: ret float [[R]] ; ; AVX-LABEL: @ext0_ext4_fmul_v8f32( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> undef, <8 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[X]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i32 0 ; AVX-NEXT: ret float [[R]] @@ -379,7 +379,7 @@ define float @ext7_ext4_fmul_v8f32(<8 x float> %x) { ; SSE-NEXT: ret float [[R]] ; ; AVX-LABEL: @ext7_ext4_fmul_v8f32( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> undef, <8 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]] ; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i64 4 ; AVX-NEXT: ret float [[R]] @@ -418,7 +418,7 @@ define float @ext14_ext15_fmul_v16f32(<16 x float> %x) { define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @ins_bo_ext_ext( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3 @@ -436,7 +436,7 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) { define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @ins_bo_ext_ext_uses( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 ; CHECK-NEXT: call void @use_f32(float [[A23]]) @@ -453,13 +453,13 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) { define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @PR34724( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]] ; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] ; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3 ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> undef, float [[A23]], i32 1 @@ -490,11 +490,11 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @ext_ext_or_reduction_v4i32( ; CHECK-NEXT: [[Z:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[Z]], [[SHIFT]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], [[SHIFT1]] -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[SHIFT2]], [[TMP2]] ; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 ; CHECK-NEXT: ret i32 [[Z0123]] @@ -512,9 +512,9 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]] ; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 ; CHECK-NEXT: ret i32 [[X210]] @@ -529,11 +529,11 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]] -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHIFT2]], [[TMP2]] ; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 ; CHECK-NEXT: ret i32 [[X2Y210]] diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll index 211936bb76d4..1a0d77e547f2 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll @@ -13,7 +13,7 @@ define i1 @fcmp_and_v2f64(<2 x double> %a) { ; ; AVX-LABEL: @fcmp_and_v2f64( ; AVX-NEXT: [[TMP1:%.*]] = fcmp olt <2 x double> [[A:%.*]], -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> undef, <2 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <2 x i32> ; AVX-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0 ; AVX-NEXT: ret i1 [[R]] @@ -37,7 +37,7 @@ define i1 @fcmp_or_v4f64(<4 x double> %a) { ; ; AVX-LABEL: @fcmp_or_v4f64( ; AVX-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[A:%.*]], -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; AVX-NEXT: [[TMP2:%.*]] = or <4 x i1> [[TMP1]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 ; AVX-NEXT: ret i1 [[R]] @@ -53,7 +53,7 @@ define i1 @fcmp_or_v4f64(<4 x double> %a) { define i1 @icmp_xor_v4i32(<4 x i32> %a) { ; CHECK-LABEL: @icmp_xor_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], [[SHIFT]] ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 ; CHECK-NEXT: ret i1 [[R]] @@ -79,7 +79,7 @@ define i1 @icmp_add_v8i32(<8 x i32> %a) { ; ; AVX-LABEL: @icmp_add_v8i32( ; AVX-NEXT: [[TMP1:%.*]] = icmp eq <8 x i32> [[A:%.*]], -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <8 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <8 x i32> ; AVX-NEXT: [[TMP2:%.*]] = add <8 x i1> [[TMP1]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <8 x i1> [[TMP2]], i64 2 ; AVX-NEXT: ret i1 [[R]] diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll b/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll index 814649e2a708..ade08844e1f8 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll @@ -109,7 +109,7 @@ define i1 @cmp01_v2f64(<2 x double> %x, <2 x double> %y) { ; SSE-NEXT: ret i1 [[CMP]] ; ; AVX-LABEL: @cmp01_v2f64( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> undef, <2 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> poison, <2 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fcmp oge <2 x double> [[X:%.*]], [[SHIFT]] ; AVX-NEXT: [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; AVX-NEXT: ret i1 [[CMP]] @@ -128,7 +128,7 @@ define i1 @cmp10_v2f64(<2 x double> %x, <2 x double> %y) { ; SSE-NEXT: ret i1 [[CMP]] ; ; AVX-LABEL: @cmp10_v2f64( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <2 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <2 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fcmp ule <2 x double> [[SHIFT]], [[Y:%.*]] ; AVX-NEXT: [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0 ; AVX-NEXT: ret i1 [[CMP]] @@ -141,7 +141,7 @@ define i1 @cmp10_v2f64(<2 x double> %x, <2 x double> %y) { define i1 @cmp12_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @cmp12_v4i32( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[SHIFT]] ; CHECK-NEXT: [[CMP:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: ret i1 [[CMP]] @@ -161,7 +161,7 @@ define <4 x i1> @ins_fcmp_ext_ext(<4 x float> %a, <4 x i1> %b) { ; SSE-NEXT: ret <4 x i1> [[R]] ; ; AVX-LABEL: @ins_fcmp_ext_ext( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fcmp ugt <4 x float> [[A]], [[SHIFT]] ; AVX-NEXT: [[A21:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 ; AVX-NEXT: [[R:%.*]] = insertelement <4 x i1> [[B:%.*]], i1 [[A21]], i32 2 @@ -176,7 +176,7 @@ define <4 x i1> @ins_fcmp_ext_ext(<4 x float> %a, <4 x i1> %b) { define <4 x i1> @ins_icmp_ext_ext(<4 x i32> %a, <4 x i1> %b) { ; CHECK-LABEL: @ins_icmp_ext_ext( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[SHIFT]], [[A]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> [[B:%.*]], i1 [[A23]], i32 3 diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll index dd0ef45c7f02..561ba227b654 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll @@ -176,7 +176,7 @@ define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p ; CHECK-LABEL: @load_f32_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, float* %p, align 4 @@ -187,7 +187,7 @@ define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenceable(16) %p) { ; CHECK-LABEL: @casted_load_f32_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %b = bitcast <4 x float>* %p to float* @@ -202,7 +202,7 @@ define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_i32_insert_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %s = load i32, i32* %p, align 4 @@ -216,7 +216,7 @@ define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceabl ; CHECK-LABEL: @casted_load_i32_insert_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %b = bitcast <16 x i8>* %p to i32* @@ -230,7 +230,7 @@ define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceabl define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @gep00_load_f32_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 0, i64 0 @@ -244,7 +244,7 @@ define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenc define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(44)* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float> addrspace(44)* [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(44)* %p, i64 0, i64 0 @@ -260,7 +260,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceabl ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>* ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1 @@ -280,7 +280,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 derefere ; ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref( ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[P:%.*]], align 16 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1 @@ -300,7 +300,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 ; ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign( ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[P:%.*]], align 2 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1 @@ -332,7 +332,7 @@ define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 derefe ; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 12 @@ -368,7 +368,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceabl ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>* ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 @@ -472,7 +472,7 @@ define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(1 ; CHECK-LABEL: @load_f32_insert_v4f32_align( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, float* %p, align 4 @@ -497,7 +497,7 @@ define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_i32_insert_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %s = load i32, i32* %p, align 4 @@ -508,7 +508,7 @@ define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) { define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) { ; CHECK-LABEL: @casted_load_i32_insert_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %b = bitcast <4 x i32>* %p to i32* @@ -521,7 +521,7 @@ define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) ; CHECK-LABEL: @load_f32_insert_v16f32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <16 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[R]] ; %s = load float, float* %p, align 4 @@ -533,7 +533,7 @@ define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p ; CHECK-LABEL: @load_f32_insert_v2f32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> [[R]] ; %s = load float, float* %p, align 4 @@ -588,7 +588,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 derefe ; CHECK-LABEL: @load_v2f32_extract_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float>* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %l = load <2 x float>, <2 x float>* %p, align 4 @@ -601,7 +601,7 @@ define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 derefe ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float>* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %l = load <8 x float>, <8 x float>* %p, align 4 @@ -638,7 +638,7 @@ define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 derefe ; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16( ; AVX2-NEXT: [[TMP1:%.*]] = bitcast <2 x i16>* [[P:%.*]] to <8 x i16>* ; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 4 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <2 x i16>, <2 x i16>* %p, i64 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index 1665c5dec4de..3bf8492aeac1 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -176,7 +176,7 @@ define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p ; CHECK-LABEL: @load_f32_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, float* %p, align 4 @@ -187,7 +187,7 @@ define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenceable(16) %p) { ; CHECK-LABEL: @casted_load_f32_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %b = bitcast <4 x float>* %p to float* @@ -202,7 +202,7 @@ define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_i32_insert_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %s = load i32, i32* %p, align 4 @@ -216,7 +216,7 @@ define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceabl ; CHECK-LABEL: @casted_load_i32_insert_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %b = bitcast <16 x i8>* %p to i32* @@ -230,7 +230,7 @@ define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceabl define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @gep00_load_f32_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 0, i64 0 @@ -244,7 +244,7 @@ define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenc define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(44)* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float> addrspace(44)* [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(44)* %p, i64 0, i64 0 @@ -260,7 +260,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceabl ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>* ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1 @@ -280,7 +280,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 derefere ; ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref( ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[P:%.*]], align 16 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1 @@ -300,7 +300,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 ; ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign( ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[P:%.*]], align 2 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1 @@ -332,7 +332,7 @@ define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 derefe ; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 12 @@ -368,7 +368,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceabl ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>* ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 @@ -472,7 +472,7 @@ define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(1 ; CHECK-LABEL: @load_f32_insert_v4f32_align( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, float* %p, align 4 @@ -497,7 +497,7 @@ define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_i32_insert_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %s = load i32, i32* %p, align 4 @@ -508,7 +508,7 @@ define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) { define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) { ; CHECK-LABEL: @casted_load_i32_insert_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %b = bitcast <4 x i32>* %p to i32* @@ -521,7 +521,7 @@ define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) ; CHECK-LABEL: @load_f32_insert_v16f32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <16 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[R]] ; %s = load float, float* %p, align 4 @@ -533,7 +533,7 @@ define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p ; CHECK-LABEL: @load_f32_insert_v2f32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> [[R]] ; %s = load float, float* %p, align 4 @@ -588,7 +588,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 derefe ; CHECK-LABEL: @load_v2f32_extract_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float>* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %l = load <2 x float>, <2 x float>* %p, align 4 @@ -601,7 +601,7 @@ define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 derefe ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float>* [[P:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %l = load <8 x float>, <8 x float>* %p, align 4 @@ -638,7 +638,7 @@ define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 derefe ; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16( ; AVX2-NEXT: [[TMP1:%.*]] = bitcast <2 x i16>* [[P:%.*]] to <8 x i16>* ; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 4 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <2 x i16>, <2 x i16>* %p, i64 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll index 4dbea1d5e705..c779e72984bf 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll @@ -12,7 +12,7 @@ define <16 x i8> @bitcast_shuf_narrow_element(<4 x i32> %v) { ; ; AVX-LABEL: @bitcast_shuf_narrow_element( ; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> -; AVX-NEXT: [[R:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; AVX-NEXT: [[R:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: ret <16 x i8> [[R]] ; %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> @@ -25,7 +25,7 @@ define <16 x i8> @bitcast_shuf_narrow_element(<4 x i32> %v) { define <4 x float> @bitcast_shuf_same_size(<4 x i32> %v) { ; CHECK-LABEL: @bitcast_shuf_same_size( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <4 x float> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> @@ -64,7 +64,7 @@ define i128 @bitcast_shuf_narrow_element_wrong_type(<4 x i32> %v) { define <4 x i32> @bitcast_shuf_wide_element(<8 x i16> %v) { ; CHECK-LABEL: @bitcast_shuf_wide_element( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <4 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %shuf = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> @@ -103,7 +103,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { ; AVX-LABEL: @PR35454_1( ; AVX-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> ; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <16 x i8> -; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], ; AVX-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> ; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> @@ -134,7 +134,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { ; AVX-LABEL: @PR35454_2( ; AVX-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> ; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <8 x i16> -; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], ; AVX-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> ; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll index 1608cc0bc3ef..c779e72984bf 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll @@ -6,16 +6,16 @@ define <16 x i8> @bitcast_shuf_narrow_element(<4 x i32> %v) { ; SSE-LABEL: @bitcast_shuf_narrow_element( -; SSE-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> +; SSE-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> ; SSE-NEXT: ret <16 x i8> [[R]] ; ; AVX-LABEL: @bitcast_shuf_narrow_element( ; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> -; AVX-NEXT: [[R:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; AVX-NEXT: [[R:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: ret <16 x i8> [[R]] ; - %shuf = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> + %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> %r = bitcast <4 x i32> %shuf to <16 x i8> ret <16 x i8> %r } @@ -25,10 +25,10 @@ define <16 x i8> @bitcast_shuf_narrow_element(<4 x i32> %v) { define <4 x float> @bitcast_shuf_same_size(<4 x i32> %v) { ; CHECK-LABEL: @bitcast_shuf_same_size( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <4 x float> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %shuf = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> + %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> %r = bitcast <4 x i32> %shuf to <4 x float> ret <4 x float> %r } @@ -37,11 +37,11 @@ define <4 x float> @bitcast_shuf_same_size(<4 x i32> %v) { define <16 x i8> @bitcast_shuf_narrow_element_wrong_size(<2 x i32> %v) { ; CHECK-LABEL: @bitcast_shuf_narrow_element_wrong_size( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> ; CHECK-NEXT: ret <16 x i8> [[R]] ; - %shuf = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> + %shuf = shufflevector <2 x i32> %v, <2 x i32> poison, <4 x i32> %r = bitcast <4 x i32> %shuf to <16 x i8> ret <16 x i8> %r } @@ -50,11 +50,11 @@ define <16 x i8> @bitcast_shuf_narrow_element_wrong_size(<2 x i32> %v) { define i128 @bitcast_shuf_narrow_element_wrong_type(<4 x i32> %v) { ; CHECK-LABEL: @bitcast_shuf_narrow_element_wrong_type( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to i128 ; CHECK-NEXT: ret i128 [[R]] ; - %shuf = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> + %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> %r = bitcast <4 x i32> %shuf to i128 ret i128 %r } @@ -64,10 +64,10 @@ define i128 @bitcast_shuf_narrow_element_wrong_type(<4 x i32> %v) { define <4 x i32> @bitcast_shuf_wide_element(<8 x i16> %v) { ; CHECK-LABEL: @bitcast_shuf_wide_element( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <4 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; - %shuf = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> + %shuf = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> %r = bitcast <8 x i16> %shuf to <4 x i32> ret <4 x i32> %r } @@ -78,12 +78,12 @@ declare void @use(<4 x i32>) define <16 x i8> @bitcast_shuf_uses(<4 x i32> %v) { ; CHECK-LABEL: @bitcast_shuf_uses( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: call void @use(<4 x i32> [[SHUF]]) ; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> ; CHECK-NEXT: ret <16 x i8> [[R]] ; - %shuf = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> + %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> call void @use(<4 x i32> %shuf) %r = bitcast <4 x i32> %shuf to <16 x i8> ret <16 x i8> %r @@ -92,30 +92,30 @@ define <16 x i8> @bitcast_shuf_uses(<4 x i32> %v) { define <2 x i64> @PR35454_1(<2 x i64> %v) { ; SSE-LABEL: @PR35454_1( ; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> -; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> undef, <4 x i32> +; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <16 x i8> ; SSE-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], ; SSE-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> -; SSE-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> undef, <4 x i32> +; SSE-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> ; SSE-NEXT: ret <2 x i64> [[BC3]] ; ; AVX-LABEL: @PR35454_1( ; AVX-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> ; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <16 x i8> -; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], ; AVX-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> -; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> undef, <4 x i32> +; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; AVX-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[BC3]] ; %bc = bitcast <2 x i64> %v to <4 x i32> - %permil = shufflevector <4 x i32> %bc, <4 x i32> undef, <4 x i32> + %permil = shufflevector <4 x i32> %bc, <4 x i32> poison, <4 x i32> %bc1 = bitcast <4 x i32> %permil to <16 x i8> %add = shl <16 x i8> %bc1, %bc2 = bitcast <16 x i8> %add to <4 x i32> - %permil1 = shufflevector <4 x i32> %bc2, <4 x i32> undef, <4 x i32> + %permil1 = shufflevector <4 x i32> %bc2, <4 x i32> poison, <4 x i32> %bc3 = bitcast <4 x i32> %permil1 to <2 x i64> ret <2 x i64> %bc3 } @@ -123,30 +123,30 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { define <2 x i64> @PR35454_2(<2 x i64> %v) { ; SSE-LABEL: @PR35454_2( ; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> -; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> undef, <4 x i32> +; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <8 x i16> ; SSE-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], ; SSE-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> -; SSE-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> undef, <4 x i32> +; SSE-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> ; SSE-NEXT: ret <2 x i64> [[BC3]] ; ; AVX-LABEL: @PR35454_2( ; AVX-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> ; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <8 x i16> -; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], ; AVX-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> -; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> undef, <4 x i32> +; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; AVX-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[BC3]] ; %bc = bitcast <2 x i64> %v to <4 x i32> - %permil = shufflevector <4 x i32> %bc, <4 x i32> undef, <4 x i32> + %permil = shufflevector <4 x i32> %bc, <4 x i32> poison, <4 x i32> %bc1 = bitcast <4 x i32> %permil to <8 x i16> %add = shl <8 x i16> %bc1, %bc2 = bitcast <8 x i16> %add to <4 x i32> - %permil1 = shufflevector <4 x i32> %bc2, <4 x i32> undef, <4 x i32> + %permil1 = shufflevector <4 x i32> %bc2, <4 x i32> poison, <4 x i32> %bc3 = bitcast <4 x i32> %permil1 to <2 x i64> ret <2 x i64> %bc3 } diff --git a/polly/test/Isl/CodeGen/invariant_load_hoist_alignment.ll b/polly/test/Isl/CodeGen/invariant_load_hoist_alignment.ll index 52ea0618b9ea..59f92cbc361f 100644 --- a/polly/test/Isl/CodeGen/invariant_load_hoist_alignment.ll +++ b/polly/test/Isl/CodeGen/invariant_load_hoist_alignment.ll @@ -17,7 +17,7 @@ body: %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ] %scevgep = getelementptr [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvar ; CHECK: [[T2:%.load]] = load i32, i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @A, i32 0, i32 0), align 4 -; CHECK: %value_p.splatinsert = insertelement <4 x i32> undef, i32 [[T2]], i32 0 +; CHECK: %value_p.splatinsert = insertelement <4 x i32> poison, i32 [[T2]], i32 0 %value = load i32, i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @A, i64 0, i64 0), align 4 %result = tail call i32 @foo(i32 %value) nounwind store i32 %result, i32* %scevgep, align 4 diff --git a/polly/test/Isl/CodeGen/simple_vec_cast.ll b/polly/test/Isl/CodeGen/simple_vec_cast.ll index 8991a07b4920..c3ee183fcedb 100644 --- a/polly/test/Isl/CodeGen/simple_vec_cast.ll +++ b/polly/test/Isl/CodeGen/simple_vec_cast.ll @@ -32,7 +32,7 @@ bb4: ; preds = %bb1 ; CHECK: %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0) ; CHECK: polly.stmt.bb2: ; preds = %polly.start -; CHECK: %tmp_p.splatinsert = insertelement <4 x float> undef, float %.load, i32 0 -; CHECK: %tmp_p.splat = shufflevector <4 x float> %tmp_p.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK: %tmp_p.splatinsert = insertelement <4 x float> poison, float %.load, i32 0 +; CHECK: %tmp_p.splat = shufflevector <4 x float> %tmp_p.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer ; CHECK: %0 = fpext <4 x float> %tmp_p.splat to <4 x double> ; CHECK: store <4 x double> %0, <4 x double>* diff --git a/polly/test/Isl/CodeGen/simple_vec_const.ll b/polly/test/Isl/CodeGen/simple_vec_const.ll index 62acccbdca4c..f3641c1f3931 100644 --- a/polly/test/Isl/CodeGen/simple_vec_const.ll +++ b/polly/test/Isl/CodeGen/simple_vec_const.ll @@ -56,5 +56,5 @@ define i32 @main() nounwind { ; CHECK: %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0) ; CHECK: polly.stmt.: ; preds = %polly.start -; CHECK: %_p.splatinsert = insertelement <4 x float> undef, float %.load, i32 0 -; CHECK: %_p.splat = shufflevector <4 x float> %_p.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK: %_p.splatinsert = insertelement <4 x float> poison, float %.load, i32 0 +; CHECK: %_p.splat = shufflevector <4 x float> %_p.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer diff --git a/polly/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll b/polly/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll index 13cc9aad4b85..b645ece1ceff 100644 --- a/polly/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll +++ b/polly/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll @@ -26,6 +26,6 @@ return: ; CHECK: %.load = load float**, float*** getelementptr inbounds ([1024 x float**], [1024 x float**]* @A, i32 0, i32 0) ; CHECK-NOT: load <1 x float**> -; CHECK: %value_p.splatinsert = insertelement <4 x float**> undef, float** %.load, i32 0 -; CHECK: %value_p.splat = shufflevector <4 x float**> %value_p.splatinsert, <4 x float**> undef, <4 x i32> zeroinitializer +; CHECK: %value_p.splatinsert = insertelement <4 x float**> poison, float** %.load, i32 0 +; CHECK: %value_p.splat = shufflevector <4 x float**> %value_p.splatinsert, <4 x float**> poison, <4 x i32> zeroinitializer ; CHECK: store <4 x float**> %value_p.splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8