From 790c29ab86d3c2e46a228ac32e3d10e18afd4d65 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 12 Aug 2021 12:35:39 -0400 Subject: [PATCH] [InstCombine] fold umax/umin intrinsics based on demanded bits This is a direct translation of the select folds added with D53033 / D53036 and another step towards canonicalization using the intrinsics (see D98152). --- .../InstCombineSimplifyDemanded.cpp | 23 +++++++++++++++++++ .../InstCombine/minmax-intrinsics.ll | 12 ++++------ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 15b51ae8a5ee..502bd15f9bad 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -829,6 +829,29 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBitsComputed = true; break; } + case Intrinsic::umax: { + // UMax(A, C) == A if ... + // The lowest non-zero bit of DemandMask is higher than the highest + // non-zero bit of C. + const APInt *C; + unsigned CTZ = DemandedMask.countTrailingZeros(); + if (match(II->getArgOperand(1), m_APInt(C)) && + CTZ >= C->getActiveBits()) + return II->getArgOperand(0); + break; + } + case Intrinsic::umin: { + // UMin(A, C) == A if ... + // The lowest non-zero bit of DemandMask is higher than the highest + // non-one bit of C. + // This comes from using DeMorgans on the above umax example. + const APInt *C; + unsigned CTZ = DemandedMask.countTrailingZeros(); + if (match(II->getArgOperand(1), m_APInt(C)) && + CTZ >= C->getBitWidth() - C->countLeadingOnes()) + return II->getArgOperand(0); + break; + } default: { // Handle target specific intrinsics Optional V = targetSimplifyDemandedUseBitsIntrinsic( diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index b41b68a565b0..8693526f198c 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -976,8 +976,7 @@ define i8 @smax_uses(i8 %x, i8 %y, i8 %z) { define i8 @umax_demand_lshr(i8 %x) { ; CHECK-LABEL: @umax_demand_lshr( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 15) -; CHECK-NEXT: [[R:%.*]] = lshr i8 [[M]], 4 +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[X:%.*]], 4 ; CHECK-NEXT: ret i8 [[R]] ; %m = call i8 @llvm.umax.i8(i8 %x, i8 15) @@ -987,8 +986,7 @@ define i8 @umax_demand_lshr(i8 %x) { define i8 @umax_demand_and(i8 %x) { ; CHECK-LABEL: @umax_demand_and( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 1) -; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], 10 +; CHECK-NEXT: [[R:%.*]] = and i8 [[X:%.*]], 10 ; CHECK-NEXT: ret i8 [[R]] ; %m = call i8 @llvm.umax.i8(i8 1, i8 %x) @@ -998,8 +996,7 @@ define i8 @umax_demand_and(i8 %x) { define i8 @umin_demand_or_31_30(i8 %x) { ; CHECK-LABEL: @umin_demand_or_31_30( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 -30) -; CHECK-NEXT: [[R:%.*]] = or i8 [[M]], 31 +; CHECK-NEXT: [[R:%.*]] = or i8 [[X:%.*]], 31 ; CHECK-NEXT: ret i8 [[R]] ; %m = call i8 @llvm.umin.i8(i8 -30, i8 %x) @@ -1009,8 +1006,7 @@ define i8 @umin_demand_or_31_30(i8 %x) { define i8 @umin_demand_and_7_8(i8 %x) { ; CHECK-LABEL: @umin_demand_and_7_8( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 -7) -; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -8 +; CHECK-NEXT: [[R:%.*]] = and i8 [[X:%.*]], -8 ; CHECK-NEXT: ret i8 [[R]] ; %m = call i8 @llvm.umin.i8(i8 %x, i8 -7)