forked from OSchip/llvm-project
[InstCombine] move shuffle after min/max with same-shuffled operands
This is an intrinsic version of the existing fold for binops. As a first step, I only allowed min/max, but the code is set up to make adding more intrinsics easy (with more or less than 2 arguments). This (and possible follow-ups) are discussed in issue #46238.
This commit is contained in:
parent
a47e03685b
commit
432c199e84
|
@ -1080,6 +1080,43 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
|
|||
return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
|
||||
}
|
||||
|
||||
/// If all arguments of the intrinsic are unary shuffles with the same mask,
|
||||
/// try to shuffle after the intrinsic.
|
||||
static Instruction *
|
||||
foldShuffledIntrinsicOperands(IntrinsicInst *II,
|
||||
InstCombiner::BuilderTy &Builder) {
|
||||
// TODO: This should be extended to handle other intrinsics like fshl, ctpop,
|
||||
// etc. Use llvm::isTriviallyVectorizable() and related to determine
|
||||
// which intrinsics are safe to shuffle?
|
||||
if (!match(II, m_MaxOrMin(m_Value(), m_Value())))
|
||||
return nullptr;
|
||||
|
||||
Value *X;
|
||||
ArrayRef<int> Mask;
|
||||
if (!match(II->getArgOperand(0),
|
||||
m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
|
||||
return nullptr;
|
||||
|
||||
// At least 1 operand must have 1 use because we are creating 2 instructions.
|
||||
if (none_of(II->args(), [](Value *V) { return V->hasOneUse(); }))
|
||||
return nullptr;
|
||||
|
||||
// See if all arguments are shuffled with the same mask.
|
||||
SmallVector<Value *, 4> NewArgs(II->arg_size());
|
||||
NewArgs[0] = X;
|
||||
for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
|
||||
if (!match(II->getArgOperand(i),
|
||||
m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))))
|
||||
return nullptr;
|
||||
NewArgs[i] = X;
|
||||
}
|
||||
|
||||
// intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
|
||||
Value *NewIntrinsic =
|
||||
Builder.CreateIntrinsic(II->getIntrinsicID(), X->getType(), NewArgs);
|
||||
return new ShuffleVectorInst(NewIntrinsic, Mask);
|
||||
}
|
||||
|
||||
/// CallInst simplification. This mostly only handles folding of intrinsic
|
||||
/// instructions. For normal calls, it allows visitCallBase to do the heavy
|
||||
/// lifting.
|
||||
|
@ -2622,6 +2659,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
|
||||
return Shuf;
|
||||
|
||||
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
|
||||
// context, so it is handled in visitCallBase and we should trigger it.
|
||||
return visitCallBase(*II);
|
||||
|
|
|
@ -2364,9 +2364,8 @@ define i8 @umax_umax_reassoc_constantexpr_sink(i8 %x, i8 %y) {
|
|||
|
||||
define <3 x i8> @smax_unary_shuffle_ops(<3 x i8> %x, <3 x i8> %y) {
|
||||
; CHECK-LABEL: @smax_unary_shuffle_ops(
|
||||
; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
|
||||
; CHECK-NEXT: [[SY:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
|
||||
; CHECK-NEXT: [[R:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[SX]], <3 x i8> [[SY]])
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]])
|
||||
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
|
||||
; CHECK-NEXT: ret <3 x i8> [[R]]
|
||||
;
|
||||
%sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
|
||||
|
@ -2379,8 +2378,8 @@ define <3 x i8> @smin_unary_shuffle_ops_use_poison_mask_elt(<3 x i8> %x, <3 x i8
|
|||
; CHECK-LABEL: @smin_unary_shuffle_ops_use_poison_mask_elt(
|
||||
; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 undef, i32 0, i32 2>
|
||||
; CHECK-NEXT: call void @use_vec(<3 x i8> [[SX]])
|
||||
; CHECK-NEXT: [[SY:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> poison, <3 x i32> <i32 undef, i32 0, i32 2>
|
||||
; CHECK-NEXT: [[R:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[SX]], <3 x i8> [[SY]])
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X]], <3 x i8> [[Y:%.*]])
|
||||
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 undef, i32 0, i32 2>
|
||||
; CHECK-NEXT: ret <3 x i8> [[R]]
|
||||
;
|
||||
%sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 poison, i32 0, i32 2>
|
||||
|
@ -2392,10 +2391,10 @@ define <3 x i8> @smin_unary_shuffle_ops_use_poison_mask_elt(<3 x i8> %x, <3 x i8
|
|||
|
||||
define <3 x i8> @umax_unary_shuffle_ops_use_widening(<2 x i8> %x, <2 x i8> %y) {
|
||||
; CHECK-LABEL: @umax_unary_shuffle_ops_use_widening(
|
||||
; CHECK-NEXT: [[SX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 0>
|
||||
; CHECK-NEXT: [[SY:%.*]] = shufflevector <2 x i8> [[Y:%.*]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 0>
|
||||
; CHECK-NEXT: call void @use_vec(<3 x i8> [[SY]])
|
||||
; CHECK-NEXT: [[R:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[SX]], <3 x i8> [[SY]])
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y]])
|
||||
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 0>
|
||||
; CHECK-NEXT: ret <3 x i8> [[R]]
|
||||
;
|
||||
%sx = shufflevector <2 x i8> %x, <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 0>
|
||||
|
@ -2407,9 +2406,8 @@ define <3 x i8> @umax_unary_shuffle_ops_use_widening(<2 x i8> %x, <2 x i8> %y) {
|
|||
|
||||
define <3 x i8> @umin_unary_shuffle_ops_narrowing(<4 x i8> %x, <4 x i8> %y) {
|
||||
; CHECK-LABEL: @umin_unary_shuffle_ops_narrowing(
|
||||
; CHECK-NEXT: [[SX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <3 x i32> <i32 1, i32 0, i32 3>
|
||||
; CHECK-NEXT: [[SY:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <3 x i32> <i32 1, i32 0, i32 3>
|
||||
; CHECK-NEXT: [[R:%.*]] = call <3 x i8> @llvm.umin.v3i8(<3 x i8> [[SX]], <3 x i8> [[SY]])
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]])
|
||||
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <3 x i32> <i32 1, i32 0, i32 3>
|
||||
; CHECK-NEXT: ret <3 x i8> [[R]]
|
||||
;
|
||||
%sx = shufflevector <4 x i8> %x, <4 x i8> poison, <3 x i32> <i32 1, i32 0, i32 3>
|
||||
|
@ -2418,6 +2416,8 @@ define <3 x i8> @umin_unary_shuffle_ops_narrowing(<4 x i8> %x, <4 x i8> %y) {
|
|||
ret <3 x i8> %r
|
||||
}
|
||||
|
||||
; negative test - must have 2 shuffles
|
||||
|
||||
define <3 x i8> @smax_unary_shuffle_ops_unshuffled_op(<3 x i8> %x, <3 x i8> %y) {
|
||||
; CHECK-LABEL: @smax_unary_shuffle_ops_unshuffled_op(
|
||||
; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 0, i32 0, i32 2>
|
||||
|
@ -2429,6 +2429,8 @@ define <3 x i8> @smax_unary_shuffle_ops_unshuffled_op(<3 x i8> %x, <3 x i8> %y)
|
|||
ret <3 x i8> %r
|
||||
}
|
||||
|
||||
; negative test - must have identical masks
|
||||
|
||||
define <3 x i8> @smax_unary_shuffle_ops_wrong_mask(<3 x i8> %x, <3 x i8> %y) {
|
||||
; CHECK-LABEL: @smax_unary_shuffle_ops_wrong_mask(
|
||||
; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 0, i32 0, i32 2>
|
||||
|
@ -2442,6 +2444,8 @@ define <3 x i8> @smax_unary_shuffle_ops_wrong_mask(<3 x i8> %x, <3 x i8> %y) {
|
|||
ret <3 x i8> %r
|
||||
}
|
||||
|
||||
; negative test - must be unary shuffles
|
||||
|
||||
define <3 x i8> @smax_unary_shuffle_ops_wrong_shuf(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
|
||||
; CHECK-LABEL: @smax_unary_shuffle_ops_wrong_shuf(
|
||||
; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> [[Z:%.*]], <3 x i32> <i32 1, i32 0, i32 3>
|
||||
|
@ -2455,6 +2459,8 @@ define <3 x i8> @smax_unary_shuffle_ops_wrong_shuf(<3 x i8> %x, <3 x i8> %y, <3
|
|||
ret <3 x i8> %r
|
||||
}
|
||||
|
||||
; negative test - too many uses
|
||||
|
||||
define <3 x i8> @smin_unary_shuffle_ops_uses(<3 x i8> %x, <3 x i8> %y) {
|
||||
; CHECK-LABEL: @smin_unary_shuffle_ops_uses(
|
||||
; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
|
||||
|
|
Loading…
Reference in New Issue