From 5bd36644c815ab7c661dd6e21ec785f2f6776ea0 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 9 Jul 2018 13:21:46 +0000 Subject: [PATCH] [InstCombine] fix shuffle-of-binops transform to avoid poison/undef As noted in D48987, there are many different ways for this transform to go wrong. In particular, the poison potential for shifts means we have to more careful with those ops. I added tests to make that behavior visible for all of the different cases that I could find. This is a partial fix. To make this review easier, I did not make changes for the single binop pattern (handled in foldSelectShuffleWith1Binop()). I also left out some potential optimizations noted with TODO comments. I'll follow-up once we're confident that things are correct here. The goal is to correct all marked FIXME tests to either avoid the shuffle transform or do it safely. Note that distinguishing when the shuffle mask contains undefs and using getBinOpIdentity() allows for some improvements to div/rem patterns, so there are wins along with the missed opportunities and fixes. Differential Revision: https://reviews.llvm.org/D49047 llvm-svn: 336546 --- .../InstCombine/InstCombineVectorOps.cpp | 73 ++++++++---- .../Transforms/InstCombine/shuffle_select.ll | 105 ++++++++++-------- 2 files changed, 110 insertions(+), 68 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index e3b935c93356..9b0746f24664 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1280,52 +1280,83 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf, // The opcodes must be the same. Use a new name to make that clear. BinaryOperator::BinaryOps BOpc = Opc0; + // We are moving a binop after a shuffle. When a shuffle has an undefined + // mask element, the result is undefined, but it is not poison or undefined + // behavior. That is not necessarily true for div/rem/shift. + Constant *Mask = Shuf.getMask(); + bool MightCreatePoisonOrUB = + Mask->containsUndefElement() && + (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc)); + + Constant *NewC; Value *V; if (X == Y) { + NewC = ConstantExpr::getShuffleVector(C0, C1, Mask); + + // The new binop constant must not have any potential for extra poison/UB. + if (MightCreatePoisonOrUB) { + // TODO: Use getBinOpAbsorber for LHS replacement constants? + if (!ConstantsAreOp1) + return nullptr; + + Type *EltTy = Shuf.getType()->getVectorElementType(); + auto *IdC = ConstantExpr::getBinOpIdentity(BOpc, EltTy, true); + if (!IdC) + return nullptr; + + // Replace undef elements caused by the mask with identity constants. + NewC = ConstantExpr::getShuffleVector(C0, C1, Mask); + unsigned NumElts = Shuf.getType()->getVectorNumElements(); + SmallVector VectorOfNewC(NumElts); + for (unsigned i = 0; i != NumElts; i++) { + if (isa(Mask->getAggregateElement(i))) + VectorOfNewC[i] = IdC; + else + VectorOfNewC[i] = NewC->getAggregateElement(i); + } + NewC = ConstantVector::get(VectorOfNewC); + } + // Remove a binop and the shuffle by rearranging the constant: // shuffle (op V, C0), (op V, C1), M --> op V, C' // shuffle (op C0, V), (op C1, V), M --> op C', V V = X; - } else if (!Instruction::isIntDivRem(BOpc) && - (B0->hasOneUse() || B1->hasOneUse())) { + } else { // If there are 2 different variable operands, we must create a new shuffle // (select) first, so check uses to ensure that we don't end up with more // instructions than we started with. - // + if (!B0->hasOneUse() && !B1->hasOneUse()) + return nullptr; + + // Bail out if we can not guarantee safety of the transform. + // TODO: We could try harder by replacing the undef mask elements. + if (MightCreatePoisonOrUB) + return nullptr; + // Note: In general, we do not create new shuffles in InstCombine because we // do not know if a target can lower an arbitrary shuffle optimally. In this // case, the shuffle uses the existing mask, so there is no additional risk. - // - // TODO: We are disallowing div/rem because a shuffle with an undef mask - // element would propagate an undef value to the div/rem. That's not - // safe in general because div/rem allow for undefined behavior. We can - // loosen this restriction (eg, check if the mask has no undefs or replace - // undef elements). // Select the variable vectors first, then perform the binop: // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C' // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M) - V = Builder.CreateShuffleVector(X, Y, Shuf.getMask()); - } else { - return nullptr; + NewC = ConstantExpr::getShuffleVector(C0, C1, Mask); + V = Builder.CreateShuffleVector(X, Y, Mask); } - Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Shuf.getMask()); - - // If the shuffle mask contains undef elements, then the new constant - // vector will have undefs in those lanes. This could cause the entire - // binop to be undef. - if (Instruction::isIntDivRem(BOpc)) - NewC = getSafeVectorConstantForIntDivRem(NewC); - Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) : BinaryOperator::Create(BOpc, NewC, V); - // Flags are intersected from the 2 source binops. + // Flags are intersected from the 2 source binops. But there are 2 exceptions: + // 1. If we changed an opcode, poison conditions might have changed. + // 2. If the shuffle had undef mask elements, the new binop might have undefs + // where the original code did not. Drop all poison potential to be safe. NewBO->copyIRFlags(B0); NewBO->andIRFlags(B1); if (DropNSW) NewBO->setHasNoSignedWrap(false); + if (Mask->containsUndefElement()) + NewBO->dropPoisonGeneratingFlags(); return NewBO; } diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll index 1c03bcf7c5dd..f997817bde66 100644 --- a/llvm/test/Transforms/InstCombine/shuffle_select.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll @@ -451,11 +451,11 @@ define <4 x i32> @add_add_undef_mask_elt(<4 x i32> %v0) { ret <4 x i32> %t3 } -; FIXME: Poison flags must be dropped or undef must be replaced with safe constant. +; Poison flags must be dropped or undef must be replaced with safe constant. define <4 x i32> @add_add_nsw_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @add_add_nsw_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = add nsw <4 x i32> [[V0:%.*]], +; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = add nsw <4 x i32> %v0, @@ -499,11 +499,11 @@ define <4 x i32> @sub_sub_undef_mask_elt(<4 x i32> %v0) { ret <4 x i32> %t3 } -; FIXME: Poison flags must be dropped or undef must be replaced with safe constant. +; Poison flags must be dropped or undef must be replaced with safe constant. define <4 x i32> @sub_sub_nuw_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @sub_sub_nuw_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = sub nuw <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sub nuw <4 x i32> , %v0 @@ -550,11 +550,11 @@ define <4 x i32> @shl_shl_nuw(<4 x i32> %v0) { ret <4 x i32> %t3 } -; FIXME: Shift by undef is poison. Undef must be replaced by safe constant. +; Shift by undef is poison. Undef must be replaced by safe constant. define <4 x i32> @shl_shl_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @shl_shl_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], +; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl <4 x i32> %v0, @@ -563,11 +563,12 @@ define <4 x i32> @shl_shl_undef_mask_elt(<4 x i32> %v0) { ret <4 x i32> %t3 } -; FIXME: Shift by undef is poison. Undef must be replaced by safe constant. +; Shift by undef is poison. Undef must be replaced by safe constant. +; TODO: We could have propagated 'nuw' here. define <4 x i32> @shl_shl_nuw_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @shl_shl_nuw_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], +; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nuw <4 x i32> %v0, @@ -695,9 +696,11 @@ define <4 x i32> @sdiv_sdiv_undef_mask_elt(<4 x i32> %v0) { ret <4 x i32> %t3 } +; TODO: 'exact' could have propagated. + define <4 x i32> @sdiv_sdiv_exact_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @sdiv_sdiv_exact_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], +; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv exact <4 x i32> %v0, @@ -717,9 +720,13 @@ define <4 x i32> @urem_urem(<4 x i32> %v0) { ret <4 x i32> %t3 } +; TODO: This could be folded by using a safe constant. + define <4 x i32> @urem_urem_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @urem_urem_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T1:%.*]] = urem <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T2:%.*]] = urem <4 x i32> , [[V0]] +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = urem <4 x i32> , %v0 @@ -739,9 +746,13 @@ define <4 x i32> @srem_srem(<4 x i32> %v0) { ret <4 x i32> %t3 } +; TODO: This could be folded by using a safe constant. + define <4 x i32> @srem_srem_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @srem_srem_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = srem <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T1:%.*]] = srem <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T2:%.*]] = srem <4 x i32> , [[V0]] +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = srem <4 x i32> , %v0 @@ -863,12 +874,12 @@ define <4 x i32> @sub_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ret <4 x i32> %t3 } -; FIXME: Poison flags must be dropped or undef must be replaced with safe constant. +; Poison flags must be dropped or undef must be replaced with safe constant. define <4 x i32> @sub_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sub_2_vars_nsw_undef_mask_elt( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = sub nsw <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sub nsw <4 x i32> , %v0 @@ -916,12 +927,12 @@ define <4 x i32> @mul_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ret <4 x i32> %t3 } -; FIXME: Poison flags must be dropped or undef must be replaced with safe constant. +; Poison flags must be dropped or undef must be replaced with safe constant. define <4 x i32> @mul_2_vars_nuw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @mul_2_vars_nuw_undef_mask_elt( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul nuw <4 x i32> %v0, @@ -956,12 +967,13 @@ define <4 x i32> @shl_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) { ret <4 x i32> %t3 } -; FIXME: Shift by undef is poison. Undef must be replaced by safe constant. +; TODO: Shift by undef is poison. Undef must be replaced by safe constant. define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_2_vars_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[TMP1]], +; CHECK-NEXT: [[T1:%.*]] = shl <4 x i32> [[V0:%.*]], +; CHECK-NEXT: [[T2:%.*]] = shl <4 x i32> [[V1:%.*]], +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl <4 x i32> %v0, @@ -970,12 +982,13 @@ define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ret <4 x i32> %t3 } -; FIXME: Shift by undef is poison. Undef must be replaced by safe constant. +; TODO: Shift by undef is poison. Undef must be replaced by safe constant. define <4 x i32> @shl_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_2_vars_nsw_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], +; CHECK-NEXT: [[T1:%.*]] = shl nsw <4 x i32> [[V0:%.*]], +; CHECK-NEXT: [[T2:%.*]] = shl nsw <4 x i32> [[V1:%.*]], +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nsw <4 x i32> %v0, @@ -1010,13 +1023,13 @@ define <4 x i32> @lshr_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { ret <4 x i32> %t3 } -; FIXME: This is not safe for the same reason that div/rem with undef mask is not safe. -; Analysis could determine that the shift has an undef shift amount creating poison. +; TODO: This could be transformed by using a safe constant. define <4 x i32> @lshr_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = lshr <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[T1:%.*]] = lshr <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> , [[V1:%.*]] +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr <4 x i32> , %v0 @@ -1025,14 +1038,13 @@ define <4 x i32> @lshr_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ret <4 x i32> %t3 } -; FIXME: This is not safe for the same reason that div/rem with undef mask is not safe. -; Analysis could determine that the shift has an undef shift amount creating poison. -; The 'exact' could also result in poison by choosing a -1 shift value. +; TODO: This could be transformed by using a safe constant. define <4 x i32> @lshr_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars_exact_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = lshr exact <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[T1:%.*]] = lshr exact <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T2:%.*]] = lshr exact <4 x i32> , [[V1:%.*]] +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr exact <4 x i32> , %v0 @@ -1107,9 +1119,8 @@ define <4 x i32> @xor_2_vars(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars( -; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> , [[V0:%.*]] -; CHECK-NEXT: [[T2:%.*]] = udiv <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = udiv <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = udiv <4 x i32> , %v0 @@ -1120,9 +1131,8 @@ define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @udiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars_exact( -; CHECK-NEXT: [[T1:%.*]] = udiv exact <4 x i32> , [[V0:%.*]] -; CHECK-NEXT: [[T2:%.*]] = udiv exact <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = udiv exact <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = udiv exact <4 x i32> , %v0 @@ -1131,6 +1141,8 @@ define <4 x i32> @udiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { ret <4 x i32> %t3 } +; TODO: This could be transformed using a safe constant. + define <4 x i32> @udiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> , [[V0:%.*]] @@ -1144,6 +1156,8 @@ define <4 x i32> @udiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ret <4 x i32> %t3 } +; TODO: This could be transformed using a safe constant. + define <4 x i32> @udiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars_exact_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = udiv exact <4 x i32> , [[V0:%.*]] @@ -1157,13 +1171,12 @@ define <4 x i32> @udiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) ret <4 x i32> %t3 } -; TODO: If the shuffle has no undefs, it's safe to shuffle the variables first. +; If the shuffle has no undefs, it's safe to shuffle the variables first. define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sdiv_2_vars( -; CHECK-NEXT: [[T1:%.*]] = sdiv <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = sdiv <4 x i32> [[V1:%.*]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv <4 x i32> %v0, @@ -1174,9 +1187,8 @@ define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @sdiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sdiv_2_vars_exact( -; CHECK-NEXT: [[T1:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = sdiv exact <4 x i32> [[V1:%.*]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv exact <4 x i32> %v0, @@ -1211,13 +1223,12 @@ define <4 x i32> @sdiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) ret <4 x i32> %t3 } -; TODO: If the shuffle has no undefs, it's safe to shuffle the variables first. +; If the shuffle has no undefs, it's safe to shuffle the variables first. define <4 x i32> @urem_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @urem_2_vars( -; CHECK-NEXT: [[T1:%.*]] = urem <4 x i32> , [[V0:%.*]] -; CHECK-NEXT: [[T2:%.*]] = urem <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = urem <4 x i32> , %v0