From f7499011ca29bebeda7c9d79d79b290cf0b8b46d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 13 Nov 2019 13:26:13 +0000 Subject: [PATCH] [InstCombine] Avoid moving ops that do restrict undef across shuffles. I think we have to be a bit more careful when it comes to moving ops across shuffles, if the op does restrict undef. For example, without this patch, we would move 'and %v, <0, 0, -1, -1>' over a 'shufflevector %a, undef, '. As a result, the first 2 lanes of the result are undef after the combine, but they really should be 0, unless I am missing something. For ops that do fold to undef on undef operands, the current behavior should be fine. I've add conservative check OpDoesRestrictUndef, maybe there's a better existing utility? Reviewers: spatel, RKSimon, lebedev.ri Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D70093 --- .../InstCombine/InstructionCombining.cpp | 4 +- .../Transforms/InstCombine/vec_shuffle.ll | 45 +++++++++++++------ 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 7656d669e8a2..d9675e7c1e03 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1543,11 +1543,13 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) { // If this is a widening shuffle, we must be able to extend with undef // elements. If the original binop does not produce an undef in the high // lanes, then this transform is not safe. + // Similarly for undef lanes due to the shuffle mask, we can only + // transform binops that preserve undef. // TODO: We could shuffle those non-undef constant values into the // result by using a constant vector (rather than an undef vector) // as operand 1 of the new binop, but that might be too aggressive // for target-independent shuffle creation. - if (I >= SrcVecNumElts) { + if (I >= SrcVecNumElts || ShMask[I] < 0) { Constant *MaybeUndef = ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt) : ConstantExpr::get(Opcode, CElt, UndefScalar); diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index 39bbfb2e5f58..07c888ac6ae0 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -1004,10 +1004,13 @@ define <2 x i32> @and_splat_constant(<2 x i32> %x) { ret <2 x i32> %r } +; AND does not fold to undef for undef operands, we cannot move it +; across a shuffle with undef masks. define <4 x i16> @and_constant_mask_undef(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[AND]] ; entry: @@ -1016,10 +1019,13 @@ entry: ret <4 x i16> %and } +; AND does not fold to undef for undef operands, we cannot move it +; across a shuffle with undef masks. define <4 x i16> @and_constant_mask_undef_2(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[AND]] ; entry: @@ -1028,10 +1034,13 @@ entry: ret <4 x i16> %and } +; FIXME: We should be able to move the AND across the shuffle, as -1 (AND identity value) is used for undef lanes. define <4 x i16> @and_constant_mask_undef_3(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i16> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], +; CHECK-NEXT: ret <4 x i16> [[AND]] ; entry: %shuffle = shufflevector <4 x i16> %add, <4 x i16> undef, <4 x i32> @@ -1039,11 +1048,12 @@ entry: ret <4 x i16> %and } +; FIXME: We should be able to move the AND across the shuffle, as -1 (AND identity value) is used for undef lanes. define <4 x i16> @and_constant_mask_undef_4(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i16> [[ADD:%.*]], -; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[AND]] ; entry: @@ -1052,7 +1062,6 @@ entry: ret <4 x i16> %and } - define <4 x i16> @and_constant_mask_not_undef(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_not_undef( ; CHECK-NEXT: entry: @@ -1066,10 +1075,13 @@ entry: ret <4 x i16> %and } +; OR does not fold to undef for undef operands, we cannot move it +; across a shuffle with undef masks. define <4 x i16> @or_constant_mask_undef(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[OR]] ; entry: @@ -1078,10 +1090,13 @@ entry: ret <4 x i16> %or } +; OR does not fold to undef for undef operands, we cannot move it +; across a shuffle with undef masks. define <4 x i16> @or_constant_mask_undef_2(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[OR]] ; entry: @@ -1090,10 +1105,13 @@ entry: ret <4 x i16> %or } +; FIXME: We should be able to move the OR across the shuffle, as 0 (OR identity value) is used for undef lanes. define <4 x i16> @or_constant_mask_undef_3(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i16> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], +; CHECK-NEXT: ret <4 x i16> [[OR]] ; entry: %shuffle = shufflevector <4 x i16> %in, <4 x i16> undef, <4 x i32> @@ -1101,11 +1119,12 @@ entry: ret <4 x i16> %or } +; FIXME: We should be able to move the OR across the shuffle, as 0 (OR identity value) is used for undef lanes. define <4 x i16> @or_constant_mask_undef_4(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i16> [[IN:%.*]], -; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[OR]] ; entry: @@ -1130,8 +1149,8 @@ entry: define <4 x i16> @shl_constant_mask_undef(<4 x i16> %in) { ; CHECK-LABEL: @shl_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i16> [[IN:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[SHL]] ; entry: