[InstCombine] add tests for vector-select-of-binops with 2 variables; NFC

llvm-svn: 335778
2018-06-27 20:23:47 +00:00 · 2018-06-27 20:23:47 +00:00 · 1ef49be8b6
parent 600a2df289
commit 1ef49be8b6
1 changed files with 263 additions and 0 deletions
--- a/llvm/test/Transforms/InstCombine/shuffle_select.ll
+++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll
@ -239,6 +239,269 @@ define <4 x double> @frem(<4 x double> %v0) {
  ret <4 x double> %t3
 }

+define <4 x i32> @add_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @add_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 undef, i32 3, i32 undef>
+; CHECK-NEXT:    [[T2:%.*]] = add <4 x i32> [[V1:%.*]], <i32 undef, i32 6, i32 undef, i32 8>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = add <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Constant operand 0 (LHS) also works.
+
+define <4 x i32> @sub_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @sub_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 undef>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = sub <4 x i32> <i32 undef, i32 undef, i32 undef, i32 8>, [[V1:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; If any element of the shuffle mask operand is undef, that element of the result is undef.
+; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
+
+define <4 x i32> @mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @mul_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 undef, i32 undef, i32 3, i32 undef>
+; CHECK-NEXT:    [[T2:%.*]] = mul <4 x i32> [[V1:%.*]], <i32 undef, i32 6, i32 undef, i32 8>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Preserve flags when possible.
+
+define <4 x i32> @shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @shl_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = shl nsw <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    [[T2:%.*]] = shl nsw <4 x i32> [[V1:%.*]], <i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
+  ret <4 x i32> %t3
+}
+
+; Can't propagate the flag here.
+
+define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @lshr_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Try weird types.
+
+define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) {
+; CHECK-LABEL: @ashr_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = ashr <3 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[T2:%.*]] = ashr <3 x i32> [[V1:%.*]], <i32 4, i32 5, i32 6>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <3 x i32> [[T1]], <3 x i32> [[T2]], <3 x i32> <i32 3, i32 1, i32 2>
+; CHECK-NEXT:    ret <3 x i32> [[T3]]
+;
+  %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3>
+  %t2 = ashr <3 x i32> %v1, <i32 4, i32 5, i32 6>
+  %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2>
+  ret <3 x i32> %t3
+}
+
+define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) {
+; CHECK-LABEL: @and_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = and <3 x i42> [[V0:%.*]], <i42 1, i42 undef, i42 undef>
+; CHECK-NEXT:    [[T2:%.*]] = and <3 x i42> [[V1:%.*]], <i42 undef, i42 5, i42 undef>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <3 x i42> [[T1]], <3 x i42> [[T2]], <3 x i32> <i32 0, i32 4, i32 undef>
+; CHECK-NEXT:    ret <3 x i42> [[T3]]
+;
+  %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3>
+  %t2 = and <3 x i42> %v1, <i42 4, i42 5, i42 6>
+  %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef>
+  ret <3 x i42> %t3
+}
+
+; It doesn't matter if only one intermediate op has extra uses.
+
+define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @or_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    [[T2:%.*]] = or <4 x i32> [[V1:%.*]], <i32 5, i32 6, i32 undef, i32 undef>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = or <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  call void @use_v4i32(<4 x i32> %t1)
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @xor_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @xor_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 1, i32 undef, i32 3, i32 4>
+; CHECK-NEXT:    [[T2:%.*]] = xor <4 x i32> [[V1:%.*]], <i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = xor <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  call void @use_v4i32(<4 x i32> %t2)
+  ret <4 x i32> %t3
+}
+
+; Div/rem need special handling if the shuffle has undef elements.
+
+define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @udiv_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
+; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+  call void @use_v4i32(<4 x i32> %t1)
+  call void @use_v4i32(<4 x i32> %t2)
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @sdiv_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    [[T2:%.*]] = sdiv <4 x i32> [[V1:%.*]], <i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @urem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @urem_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @srem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @srem_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
+  ret <4 x i32> %t3
+}
+
+; Try FP ops/types.
+
+define <4 x float> @fadd_2_vars(<4 x float> %v0, <4 x float> %v1) {
+; CHECK-LABEL: @fadd_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = fadd <4 x float> [[V0:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:    [[T2:%.*]] = fadd <4 x float> [[V1:%.*]], <float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x float> [[T1]], <4 x float> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[T3]]
+;
+  %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %t2 = fadd <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0>
+  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x float> %t3
+}
+
+define <4 x double> @fsub_2_vars(<4 x double> %v0, <4 x double> %v1) {
+; CHECK-LABEL: @fsub_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = fsub <4 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fsub <4 x double> <double 5.000000e+00, double 6.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V1:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x double> [[T3]]
+;
+  %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
+  %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1
+  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+  ret <4 x double> %t3
+}
+
+; Intersect any FMF.
+
+define <4 x float> @fmul_2_vars(<4 x float> %v0, <4 x float> %v1) {
+; CHECK-LABEL: @fmul_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = fmul reassoc nsz <4 x float> [[V0:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc nsz <4 x float> [[V1:%.*]], <float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x float> [[T1]], <4 x float> [[T2]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[T3]]
+;
+  %t1 = fmul reassoc nsz <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %t2 = fmul reassoc nsz <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0>
+  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %t3
+}
+
+define <4 x double> @frem_2_vars(<4 x double> %v0, <4 x double> %v1) {
+; CHECK-LABEL: @frem_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = frem nnan ninf <4 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = frem nnan arcp <4 x double> <double 5.000000e+00, double 6.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V1:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x double> [[T3]]
+;
+  %t1 = frem nnan ninf <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
+  %t2 = frem nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1
+  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+  ret <4 x double> %t3
+}
+
+; The variable operand must be either the first operand or second operand in both binops.
+
+define <4 x double> @fdiv_2_vars(<4 x double> %v0, <4 x double> %v1) {
+; CHECK-LABEL: @fdiv_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = fdiv <4 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fdiv <4 x double> [[V1:%.*]], <double 5.000000e+00, double 6.000000e+00, double 7.000000e+00, double 8.000000e+00>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x double> [[T3]]
+;
+  %t1 = fdiv <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
+  %t2 = fdiv <4 x double> %v1, <double 5.0, double 6.0, double 7.0, double 8.0>
+  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x double> %t3
+}
+
 ; FIXME:
 ; Shift-left with constant shift amount can be converted to mul to enable the fold.