[CVP] Enhance SRem -> URem fold to work not just on non-negative operands

This is a continuation of 8d487668d0, the logic is pretty much identical for SRem: Name: pos pos Pre: C0 >= 0 && C1 >= 0 %r = srem i8 C0, C1 => %r = urem i8 C0, C1 Name: pos neg Pre: C0 >= 0 && C1 <= 0 %r = srem i8 C0, C1 => %r = urem i8 C0, -C1 Name: neg pos Pre: C0 <= 0 && C1 >= 0 %r = srem i8 C0, C1 => %t0 = urem i8 -C0, C1 %r = sub i8 0, %t0 Name: neg neg Pre: C0 <= 0 && C1 <= 0 %r = srem i8 C0, C1 => %t0 = urem i8 -C0, -C1 %r = sub i8 0, %t0 https://rise4fun.com/Alive/Vd6 Now, this new logic does not result in any new catches as of vanilla llvm test-suite + RawSpeed. but it should be virtually compile-time free, and it may be important to be consistent in their handling, because if we had a pair of sdiv-srem, and only converted one of them, -divrempairs will no longer see them as a pair, and thus not "merge" them.
2020-09-22 10:37:15 +03:00 · 2020-09-22 10:37:15 +03:00 · 4eeeb356fc
parent 36ea18b064
commit 4eeeb356fc
2 changed files with 63 additions and 27 deletions
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@ -613,10 +613,15 @@ static bool isNonPositive(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
  return Result == LazyValueInfo::True;
 }

-static bool allOperandsAreNonNegative(BinaryOperator *SDI, LazyValueInfo *LVI) {
-  return all_of(SDI->operands(),
-                [&](Value *Op) { return isNonNegative(Op, LVI, SDI); });
-}
+enum class Domain { NonNegative, NonPositive, Unknown };
+
+Domain getDomain(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
+  if (isNonNegative(V, LVI, CxtI))
+    return Domain::NonNegative;
+  if (isNonPositive(V, LVI, CxtI))
+    return Domain::NonPositive;
+  return Domain::Unknown;
+};

 /// Try to shrink a udiv/urem's width down to the smallest power of two that's
 /// sufficient to contain its operands.
@ -661,18 +666,51 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
 }

 static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
-  if (SDI->getType()->isVectorTy() || !allOperandsAreNonNegative(SDI, LVI))
+  if (SDI->getType()->isVectorTy())
    return false;

+  struct Operand {
+    Value *V;
+    Domain D;
+  };
+  std::array<Operand, 2> Ops;
+
+  for (const auto I : zip(Ops, SDI->operands())) {
+    Operand &Op = std::get<0>(I);
+    Op.V = std::get<1>(I);
+    Op.D = getDomain(Op.V, LVI, SDI);
+    if (Op.D == Domain::Unknown)
+      return false;
+  }
+
+  // We know domains of both of the operands!
  ++NumSRems;
-  auto *BO = BinaryOperator::CreateURem(SDI->getOperand(0), SDI->getOperand(1),
-                                        SDI->getName(), SDI);
-  BO->setDebugLoc(SDI->getDebugLoc());
-  SDI->replaceAllUsesWith(BO);
+
+  // We need operands to be non-negative, so negate each one that isn't.
+  for (Operand &Op : Ops) {
+    if (Op.D == Domain::NonNegative)
+      continue;
+    auto *BO =
+        BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI);
+    BO->setDebugLoc(SDI->getDebugLoc());
+    Op.V = BO;
+  }
+
+  auto *URem =
+      BinaryOperator::CreateURem(Ops[0].V, Ops[1].V, SDI->getName(), SDI);
+  URem->setDebugLoc(SDI->getDebugLoc());
+
+  Value *Res = URem;
+
+  // If the divident was non-positive, we need to negate the result.
+  if (Ops[0].D == Domain::NonPositive)
+    Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI);
+
+  SDI->replaceAllUsesWith(Res);
  SDI->eraseFromParent();

-  // Try to process our new urem.
-  processUDivOrURem(BO, LVI);
+  // Try to simplify our new urem.
+  processUDivOrURem(URem, LVI);

  return true;
 }
@ -686,24 +724,16 @@ static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
  if (SDI->getType()->isVectorTy())
    return false;

-  enum class Domain { NonNegative, NonPositive, Unknown };
-  auto getDomain = [&](Value *V) {
-    if (isNonNegative(V, LVI, SDI))
-      return Domain::NonNegative;
-    if (isNonPositive(V, LVI, SDI))
-      return Domain::NonPositive;
-    return Domain::Unknown;
-  };
-
  struct Operand {
    Value *V;
    Domain D;
  };
  std::array<Operand, 2> Ops;
+
  for (const auto I : zip(Ops, SDI->operands())) {
    Operand &Op = std::get<0>(I);
    Op.V = std::get<1>(I);
-    Op.D = getDomain(Op.V);
+    Op.D = getDomain(Op.V, LVI, SDI);
    if (Op.D == Domain::Unknown)
      return false;
  }
--- a/llvm/test/Transforms/CorrelatedValuePropagation/srem.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/srem.ll
@ -90,8 +90,9 @@ define i8 @test6_pos_neg(i8 %x, i8 %y) {
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[C0]])
 ; CHECK-NEXT:    [[C1:%.*]] = icmp sle i8 [[Y:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[C1]])
-; CHECK-NEXT:    [[REM:%.*]] = srem i8 [[X]], [[Y]]
-; CHECK-NEXT:    ret i8 [[REM]]
+; CHECK-NEXT:    [[Y_NONNEG:%.*]] = sub i8 0, [[Y]]
+; CHECK-NEXT:    [[REM1:%.*]] = urem i8 [[X]], [[Y_NONNEG]]
+; CHECK-NEXT:    ret i8 [[REM1]]
 ;
  %c0 = icmp sge i8 %x, 0
  call void @llvm.assume(i1 %c0)
@ -107,8 +108,10 @@ define i8 @test7_neg_pos(i8 %x, i8 %y) {
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[C0]])
 ; CHECK-NEXT:    [[C1:%.*]] = icmp sge i8 [[Y:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[C1]])
-; CHECK-NEXT:    [[REM:%.*]] = srem i8 [[X]], [[Y]]
-; CHECK-NEXT:    ret i8 [[REM]]
+; CHECK-NEXT:    [[X_NONNEG:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[REM1:%.*]] = urem i8 [[X_NONNEG]], [[Y]]
+; CHECK-NEXT:    [[REM1_NEG:%.*]] = sub i8 0, [[REM1]]
+; CHECK-NEXT:    ret i8 [[REM1_NEG]]
 ;
  %c0 = icmp sle i8 %x, 0
  call void @llvm.assume(i1 %c0)
@ -124,8 +127,11 @@ define i8 @test8_neg_neg(i8 %x, i8 %y) {
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[C0]])
 ; CHECK-NEXT:    [[C1:%.*]] = icmp sle i8 [[Y:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[C1]])
-; CHECK-NEXT:    [[REM:%.*]] = srem i8 [[X]], [[Y]]
-; CHECK-NEXT:    ret i8 [[REM]]
+; CHECK-NEXT:    [[X_NONNEG:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[Y_NONNEG:%.*]] = sub i8 0, [[Y]]
+; CHECK-NEXT:    [[REM1:%.*]] = urem i8 [[X_NONNEG]], [[Y_NONNEG]]
+; CHECK-NEXT:    [[REM1_NEG:%.*]] = sub i8 0, [[REM1]]
+; CHECK-NEXT:    ret i8 [[REM1_NEG]]
 ;
  %c0 = icmp sle i8 %x, 0
  call void @llvm.assume(i1 %c0)