[InstCombine] reduce demand-limited bool math to logic

The cmp math test is inspired by memcmp() patterns seen in D75840.
I know there's at least 1 related fold we can do here if both
values are sext'd, but I'm not seeing a way to generalize further.

We have some other bool math patterns that we want to reduce, but
that might require fixing the bogus transforms noted in D72396.

Alive proof translations of the regression tests:
https://rise4fun.com/Alive/zGWi

  Name: demand add 1
  %xz = zext i1 %x to i32
  %ys = sext i1 %y to i32
  %sub = add i32 %xz, %ys
  %r = lshr i32 %sub, 31
  =>
  %notx = xor i1 %x, 1
  %and = and i1 %y, %notx
  %r = zext i1 %and to i32

  Name: demand add 2
  %xz = zext i1 %x to i5
  %ys = sext i1 %y to i5
  %sub = add i5 %xz, %ys
  %r = and i5 %sub, 16
  =>
  %notx = xor i1 %x, 1
  %and = and i1 %y, %notx
  %r = select i1 %and, i5 -16, i5 0

  Name: demand add 3
  %xz = zext i1 %x to i8
  %ys = sext i1 %y to i8
  %a = add i8 %ys, %xz
  %r = ashr i8 %a, 7
  =>
  %notx = xor i1 %x, 1
  %and = and i1 %y, %notx
  %r = sext i1 %and to i8

  Name: cmp math
  %gt = icmp ugt i32 %x, %y
  %lt = icmp ult i32 %x, %y
  %xz = zext i1 %gt to i32
  %yz = zext i1 %lt to i32
  %s = sub i32 %xz, %yz
  %r = lshr i32 %s, 31
  =>
  %r = zext i1 %lt to i32

Differential Revision: https://reviews.llvm.org/D75961
This commit is contained in:
Sanjay Patel 2020-03-11 14:35:31 -04:00
parent fa8c4c7ffa
commit fae900921b
2 changed files with 40 additions and 19 deletions

View File

@ -454,6 +454,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
}
case Instruction::Add:
if ((DemandedMask & 1) == 0) {
// If we do not need the low bit, try to convert bool math to logic:
// add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN
// Truth table for inputs and output signbits:
// X:0 | X:1
// ----------
// Y:0 | 0 | 0 |
// Y:1 | -1 | 0 |
// ----------
Value *X, *Y;
if (match(I, m_c_Add(m_OneUse(m_ZExt(m_Value(X))),
m_OneUse(m_SExt(m_Value(Y))))) &&
X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) {
IRBuilderBase::InsertPointGuard Guard(Builder);
Builder.SetInsertPoint(I);
Value *AndNot = Builder.CreateAnd(Builder.CreateNot(X), Y);
return Builder.CreateSExt(AndNot, VTy);
}
}
LLVM_FALLTHROUGH;
case Instruction::Sub: {
/// If the high-bits of an ADD/SUB are not demanded, then we do not care
/// about the high bits of the operands.

View File

@ -1055,10 +1055,9 @@ define <2 x i32> @test44_vec_non_splat(<2 x i32> %A) {
define i32 @lshr_add(i1 %x, i1 %y) {
; CHECK-LABEL: @lshr_add(
; CHECK-NEXT: [[XZ:%.*]] = zext i1 [[X:%.*]] to i32
; CHECK-NEXT: [[YS:%.*]] = sext i1 [[Y:%.*]] to i32
; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[XZ]], [[YS]]
; CHECK-NEXT: [[R:%.*]] = lshr i32 [[SUB]], 31
; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[X:%.*]], true
; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = zext i1 [[TMP2]] to i32
; CHECK-NEXT: ret i32 [[R]]
;
%xz = zext i1 %x to i32
@ -1070,10 +1069,9 @@ define i32 @lshr_add(i1 %x, i1 %y) {
define i5 @and_add(i1 %x, i1 %y) {
; CHECK-LABEL: @and_add(
; CHECK-NEXT: [[XZ:%.*]] = zext i1 [[X:%.*]] to i5
; CHECK-NEXT: [[YS:%.*]] = sext i1 [[Y:%.*]] to i5
; CHECK-NEXT: [[SUB:%.*]] = add nsw i5 [[XZ]], [[YS]]
; CHECK-NEXT: [[R:%.*]] = and i5 [[SUB]], -2
; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[X:%.*]], true
; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i5 -2, i5 0
; CHECK-NEXT: ret i5 [[R]]
;
%xz = zext i1 %x to i5
@ -1085,11 +1083,10 @@ define i5 @and_add(i1 %x, i1 %y) {
define <2 x i8> @ashr_add_commute(<2 x i1> %x, <2 x i1> %y) {
; CHECK-LABEL: @ashr_add_commute(
; CHECK-NEXT: [[XZ:%.*]] = zext <2 x i1> [[X:%.*]] to <2 x i8>
; CHECK-NEXT: [[YS:%.*]] = sext <2 x i1> [[Y:%.*]] to <2 x i8>
; CHECK-NEXT: [[SUB:%.*]] = add nsw <2 x i8> [[YS]], [[XZ]]
; CHECK-NEXT: [[R:%.*]] = ashr <2 x i8> [[SUB]], <i8 1, i8 1>
; CHECK-NEXT: ret <2 x i8> [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], <i1 true, i1 true>
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[Y:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i8>
; CHECK-NEXT: ret <2 x i8> [[TMP3]]
;
%xz = zext <2 x i1> %x to <2 x i8>
%ys = sext <2 x i1> %y to <2 x i8>
@ -1100,12 +1097,8 @@ define <2 x i8> @ashr_add_commute(<2 x i1> %x, <2 x i1> %y) {
define i32 @cmp_math(i32 %x, i32 %y) {
; CHECK-LABEL: @cmp_math(
; CHECK-NEXT: [[GT:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[LT:%.*]] = icmp ult i32 [[X]], [[Y]]
; CHECK-NEXT: [[XZ:%.*]] = zext i1 [[GT]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = sext i1 [[LT]] to i32
; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[XZ]], [[TMP1]]
; CHECK-NEXT: [[R:%.*]] = lshr i32 [[S]], 31
; CHECK-NEXT: [[LT:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = zext i1 [[LT]] to i32
; CHECK-NEXT: ret i32 [[R]]
;
%gt = icmp ugt i32 %x, %y
@ -1117,6 +1110,8 @@ define i32 @cmp_math(i32 %x, i32 %y) {
ret i32 %r
}
; Negative test - wrong type
define i32 @lshr_add_nonbool(i2 %x, i1 %y) {
; CHECK-LABEL: @lshr_add_nonbool(
; CHECK-NEXT: [[XZ:%.*]] = zext i2 [[X:%.*]] to i32
@ -1132,6 +1127,8 @@ define i32 @lshr_add_nonbool(i2 %x, i1 %y) {
ret i32 %r
}
; Negative test - wrong demand
define i32 @and31_add(i1 %x, i1 %y) {
; CHECK-LABEL: @and31_add(
; CHECK-NEXT: [[XZ:%.*]] = zext i1 [[X:%.*]] to i32
@ -1147,6 +1144,8 @@ define i32 @and31_add(i1 %x, i1 %y) {
ret i32 %r
}
; Negative test - extra use
define i32 @lshr_add_use(i1 %x, i1 %y, i32* %p) {
; CHECK-LABEL: @lshr_add_use(
; CHECK-NEXT: [[XZ:%.*]] = zext i1 [[X:%.*]] to i32
@ -1164,6 +1163,8 @@ define i32 @lshr_add_use(i1 %x, i1 %y, i32* %p) {
ret i32 %r
}
; Negative test - extra use
define i32 @lshr_add_use2(i1 %x, i1 %y, i32* %p) {
; CHECK-LABEL: @lshr_add_use2(
; CHECK-NEXT: [[XZ:%.*]] = zext i1 [[X:%.*]] to i32