Implement branch-free single-division lowering of affine division/remainder

This implements the lowering of `floordiv`, `ceildiv` and `mod` operators from affine expressions to the arithmetic primitive operations. Integer division rules in affine expressions explicitly require rounding towards either negative or positive infinity unlike machine implementations that round towards zero. In the general case, implementing `floordiv` and `ceildiv` using machine signed division requires computing both the quotient and the remainder. When the divisor is positive, this can be simplified by adjusting the dividend and the quotient by one and switching signs. In the current use cases, we are unlikely to encounter affine expressions with negative divisors (affine divisions appear in loop transformations such as tiling that guarantee that divisors are positive by construction). Therefore, it is reasonable to use branch-free single-division implementation. In case of affine maps, divisors can only be literals so we can check the sign and implement the case for negative divisors when the need arises. The affine lowering pass can still fail when applied to semi-affine maps (division or modulo by a symbol). PiperOrigin-RevId: 228668181
2019-01-10 01:44:32 -08:00 · 2019-01-10 01:44:32 -08:00 · 9003490287
parent 56b99b4045
commit 9003490287
3 changed files with 284 additions and 12 deletions
--- a/mlir/lib/Transforms/LowerAffine.cpp
+++ b/mlir/lib/Transforms/LowerAffine.cpp
@ -61,22 +61,126 @@ public:
    return buildBinaryExpr<MulIOp>(expr);
  }

-  // TODO(zinenko): implement when the standard operators are made available.
-  Value *visitModExpr(AffineBinaryOpExpr) {
-    builder.getContext()->emitError(loc, "unsupported binary operator: mod");
-    return nullptr;
+  // Euclidean modulo operation: negative RHS is not allowed.
+  // Remainder of the euclidean integer division is always non-negative.
+  //
+  // Implemented as
+  //
+  //     a mod b =
+  //         let remainder = srem a, b;
+  //             negative = a < 0 in
+  //         select negative, remainder + b, remainder.
+  Value *visitModExpr(AffineBinaryOpExpr expr) {
+    auto rhsConst = expr.getRHS().dyn_cast<AffineConstantExpr>();
+    if (!rhsConst) {
+      builder.getContext()->emitError(
+          loc,
+          "semi-affine expressions (modulo by non-const) are not supported");
+      return nullptr;
+    }
+    if (rhsConst.getValue() <= 0) {
+      builder.getContext()->emitError(
+          loc, "modulo by non-positive value is not supported");
+      return nullptr;
+    }
+
+    auto lhs = visit(expr.getLHS());
+    auto rhs = visit(expr.getRHS());
+    assert(lhs && rhs && "unexpected affine expr lowering failure");
+
+    Value *remainder = builder.create<RemISOp>(loc, lhs, rhs);
+    Value *zeroCst = builder.create<ConstantIndexOp>(loc, 0);
+    Value *isRemainderNegative =
+        builder.create<CmpIOp>(loc, CmpIPredicate::SLT, remainder, zeroCst);
+    Value *correctedRemainder = builder.create<AddIOp>(loc, remainder, rhs);
+    Value *result = builder.create<SelectOp>(loc, isRemainderNegative,
+                                             correctedRemainder, remainder);
+    return result;
  }

-  Value *visitFloorDivExpr(AffineBinaryOpExpr) {
-    builder.getContext()->emitError(loc,
-                                    "unsupported binary operator: floor_div");
-    return nullptr;
+  // Floor division operation (rounds towards negative infinity).
+  //
+  // For positive divisors, it can be implemented without branching and with a
+  // single division instruction as
+  //
+  //        a floordiv b =
+  //            let negative = a < 0 in
+  //            let absolute = negative ? -a - 1 : a in
+  //            let quotient = absolute / b in
+  //                negative ? -quotient - 1 : quotient
+  Value *visitFloorDivExpr(AffineBinaryOpExpr expr) {
+    auto rhsConst = expr.getRHS().dyn_cast<AffineConstantExpr>();
+    if (!rhsConst) {
+      builder.getContext()->emitError(
+          loc,
+          "semi-affine expressions (division by non-const) are not supported");
+      return nullptr;
+    }
+    if (rhsConst.getValue() <= 0) {
+      builder.getContext()->emitError(
+          loc, "division by non-positive value is not supported");
+      return nullptr;
+    }
+
+    auto lhs = visit(expr.getLHS());
+    auto rhs = visit(expr.getRHS());
+    assert(lhs && rhs && "unexpected affine expr lowering failure");
+
+    Value *zeroCst = builder.create<ConstantIndexOp>(loc, 0);
+    Value *noneCst = builder.create<ConstantIndexOp>(loc, -1);
+    Value *negative =
+        builder.create<CmpIOp>(loc, CmpIPredicate::SLT, lhs, zeroCst);
+    Value *negatedDecremented = builder.create<SubIOp>(loc, noneCst, lhs);
+    Value *dividend =
+        builder.create<SelectOp>(loc, negative, negatedDecremented, lhs);
+    Value *quotient = builder.create<DivISOp>(loc, dividend, rhs);
+    Value *correctedQuotient = builder.create<SubIOp>(loc, noneCst, quotient);
+    Value *result =
+        builder.create<SelectOp>(loc, negative, correctedQuotient, quotient);
+    return result;
  }

-  Value *visitCeilDivExpr(AffineBinaryOpExpr) {
-    builder.getContext()->emitError(loc,
-                                    "unsupported binary operator: ceil_div");
-    return nullptr;
+  // Ceiling division operation (rounds towards positive infinity).
+  //
+  // For positive divisors, it can be implemented without branching and with a
+  // single division instruction as
+  //
+  //     a ceildiv b =
+  //         let negative = a <= 0 in
+  //         let absolute = negative ? -a : a - 1 in
+  //         let quotient = absolute / b in
+  //             negative ? -quotient : quotient + 1
+  Value *visitCeilDivExpr(AffineBinaryOpExpr expr) {
+    auto rhsConst = expr.getRHS().dyn_cast<AffineConstantExpr>();
+    if (!rhsConst) {
+      builder.getContext()->emitError(
+          loc,
+          "semi-affine expressions (division by non-const) are not supported");
+      return nullptr;
+    }
+    if (rhsConst.getValue() <= 0) {
+      builder.getContext()->emitError(
+          loc, "division by non-positive value is not supported");
+      return nullptr;
+    }
+    auto lhs = visit(expr.getLHS());
+    auto rhs = visit(expr.getRHS());
+    assert(lhs && rhs && "unexpected affine expr lowering failure");
+
+    Value *zeroCst = builder.create<ConstantIndexOp>(loc, 0);
+    Value *oneCst = builder.create<ConstantIndexOp>(loc, 1);
+    Value *nonPositive =
+        builder.create<CmpIOp>(loc, CmpIPredicate::SLE, lhs, zeroCst);
+    Value *negated = builder.create<SubIOp>(loc, zeroCst, lhs);
+    Value *decremented = builder.create<SubIOp>(loc, lhs, oneCst);
+    Value *dividend =
+        builder.create<SelectOp>(loc, nonPositive, negated, decremented);
+    Value *quotient = builder.create<DivISOp>(loc, dividend, rhs);
+    Value *negatedQuotient = builder.create<SubIOp>(loc, zeroCst, quotient);
+    Value *incrementedQuotient = builder.create<AddIOp>(loc, quotient, oneCst);
+    Value *result = builder.create<SelectOp>(loc, nonPositive, negatedQuotient,
+                                             incrementedQuotient);
+    return result;
  }

  Value *visitConstantExpr(AffineConstantExpr expr) {
--- a/mlir/test/Transforms/constant-fold.mlir
+++ b/mlir/test/Transforms/constant-fold.mlir
@ -253,3 +253,99 @@ func @cmpi() -> (i1, i1, i1, i1, i1, i1, i1, i1, i1, i1) {
  %9 = cmpi "uge", %c42, %cm1 : i32
  return %0, %1, %2, %3, %4, %5, %6, %7, %8, %9 : i1, i1, i1, i1, i1, i1, i1, i1, i1, i1
 }
+
+// --------------------------------------------------------------------------//
+// IMPORTANT NOTE: the operations in this test are exactly those produced by
+// lowering affine_apply (i) -> (i mod 42) to standard operations.  Please only
+// change these operations together with the affine lowering pass tests.
+// --------------------------------------------------------------------------//
+// CHECK-LABEL: @lowered_affine_mod
+func @lowered_affine_mod() -> (index, index) {
+// CHECK-NEXT: {{.*}} = constant 41 : index
+  %c-43 = constant -43 : index
+  %c42 = constant 42 : index
+  %0 = remis %c-43, %c42 : index
+  %c0 = constant 0 : index
+  %1 = cmpi "slt", %0, %c0 : index
+  %2 = addi %0, %c42 : index
+  %3 = select %1, %2, %0 : index
+// CHEKC-NEXT: {{.*}} = constant 1 : index
+  %c43 = constant 43 : index
+  %c42_0 = constant 42 : index
+  %4 = remis %c43, %c42_0 : index
+  %c0_1 = constant 0 : index
+  %5 = cmpi "slt", %4, %c0_1 : index
+  %6 = addi %4, %c42_0 : index
+  %7 = select %5, %6, %4 : index
+  return %3, %7 : index, index
+}
+
+// --------------------------------------------------------------------------//
+// IMPORTANT NOTE: the operations in this test are exactly those produced by
+// lowering affine_apply (i) -> (i mod 42) to standard operations.  Please only
+// change these operations together with the affine lowering pass tests.
+// --------------------------------------------------------------------------//
+// CHECK-LABEL: func @lowered_affine_floordiv
+func @lowered_affine_floordiv() -> (index, index) {
+// CHECK-NEXT: %c-2 = constant -2 : index
+  %c-43 = constant -43 : index
+  %c42 = constant 42 : index
+  %c0 = constant 0 : index
+  %c-1 = constant -1 : index
+  %0 = cmpi "slt", %c-43, %c0 : index
+  %1 = subi %c-1, %c-43 : index
+  %2 = select %0, %1, %c-43 : index
+  %3 = divis %2, %c42 : index
+  %4 = subi %c-1, %3 : index
+  %5 = select %0, %4, %3 : index
+// CHECK-NEXT: %c1 = constant 1 : index
+  %c43 = constant 43 : index
+  %c42_0 = constant 42 : index
+  %c0_1 = constant 0 : index
+  %c-1_2 = constant -1 : index
+  %6 = cmpi "slt", %c43, %c0_1 : index
+  %7 = subi %c-1_2, %c43 : index
+  %8 = select %6, %7, %c43 : index
+  %9 = divis %8, %c42_0 : index
+  %10 = subi %c-1_2, %9 : index
+  %11 = select %6, %10, %9 : index
+  return %5, %11 : index, index
+}
+
+// --------------------------------------------------------------------------//
+// IMPORTANT NOTE: the operations in this test are exactly those produced by
+// lowering affine_apply (i) -> (i mod 42) to standard operations.  Please only
+// change these operations together with the affine lowering pass tests.
+// --------------------------------------------------------------------------//
+// CHECK-LABEL: func @lowered_affine_ceildiv
+func @lowered_affine_ceildiv() -> (index, index) {
+// CHECK-NEXT:  %c-1 = constant -1 : index
+  %c-43 = constant -43 : index
+  %c42 = constant 42 : index
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %0 = cmpi "sle", %c-43, %c0 : index
+  %1 = subi %c0, %c-43 : index
+  %2 = subi %c-43, %c1 : index
+  %3 = select %0, %1, %2 : index
+  %4 = divis %3, %c42 : index
+  %5 = subi %c0, %4 : index
+  %6 = addi %4, %c1 : index
+  %7 = select %0, %5, %6 : index
+// CHECK-NEXT:  %c2 = constant 2 : index
+  %c43 = constant 43 : index
+  %c42_0 = constant 42 : index
+  %c0_1 = constant 0 : index
+  %c1_2 = constant 1 : index
+  %8 = cmpi "sle", %c43, %c0_1 : index
+  %9 = subi %c0_1, %c43 : index
+  %10 = subi %c43, %c1_2 : index
+  %11 = select %8, %9, %10 : index
+  %12 = divis %11, %c42_0 : index
+  %13 = subi %c0_1, %12 : index
+  %14 = addi %12, %c1_2 : index
+  %15 = select %8, %13, %14 : index
+  return %7, %15 : index, index
+}
+
+
--- a/mlir/test/Transforms/lower-affine.mlir
+++ b/mlir/test/Transforms/lower-affine.mlir
@ -576,3 +576,75 @@ func @args_ret_affine_apply(index, index) -> (index, index) {
  %11 = affine_apply #map1 ()[%1]
  return %00, %11 : index, index
 }
+
+//===---------------------------------------------------------------------===//
+// Test lowering of Euclidean (floor) division, ceil division and modulo
+// operation used in affine expressions.  In addition to testing the
+// instruction-level output, check that the obtained results are correct by
+// applying constant folding transformation after affine lowering.
+//===---------------------------------------------------------------------===//
+
+#mapmod = (i) -> (i mod 42)
+
+// --------------------------------------------------------------------------//
+// IMPORTANT NOTE: if you change this test, also change the @lowered_affine_mod
+// test in the "constant-fold.mlir" test to reflect the expected output of
+// affine_apply lowering.
+// --------------------------------------------------------------------------//
+// CHECK-LABEL: func @affine_apply_mod
+func @affine_apply_mod(%arg0 : index) -> (index) {
+// CHECK-NEXT: %c42 = constant 42 : index
+// CHECK-NEXT: %0 = remis %arg0, %c42 : index
+// CHECK-NEXT: %c0 = constant 0 : index
+// CHECK-NEXT: %1 = cmpi "slt", %0, %c0 : index
+// CHECK-NEXT: %2 = addi %0, %c42 : index
+// CHECK-NEXT: %3 = select %1, %2, %0 : index
+  %0 = affine_apply #mapmod (%arg0)
+  return %0 : index
+}
+
+#mapfloordiv = (i) -> (i floordiv 42)
+
+// --------------------------------------------------------------------------//
+// IMPORTANT NOTE: if you change this test, also change the @lowered_affine_mod
+// test in the "constant-fold.mlir" test to reflect the expected output of
+// affine_apply lowering.
+// --------------------------------------------------------------------------//
+// CHECK-LABEL: func @affine_apply_floordiv
+func @affine_apply_floordiv(%arg0 : index) -> (index) {
+// CHECK-NEXT: %c42 = constant 42 : index
+// CHECK-NEXT: %c0 = constant 0 : index
+// CHECK-NEXT: %c-1 = constant -1 : index
+// CHECK-NEXT: %0 = cmpi "slt", %arg0, %c0 : index
+// CHECK-NEXT: %1 = subi %c-1, %arg0 : index
+// CHECK-NEXT: %2 = select %0, %1, %arg0 : index
+// CHECK-NEXT: %3 = divis %2, %c42 : index
+// CHECK-NEXT: %4 = subi %c-1, %3 : index
+// CHECK-NEXT: %5 = select %0, %4, %3 : index
+  %0 = affine_apply #mapfloordiv (%arg0)
+  return %0 : index
+}
+
+#mapceildiv = (i) -> (i ceildiv 42)
+
+// --------------------------------------------------------------------------//
+// IMPORTANT NOTE: if you change this test, also change the @lowered_affine_mod
+// test in the "constant-fold.mlir" test to reflect the expected output of
+// affine_apply lowering.
+// --------------------------------------------------------------------------//
+// CHECK-LABEL: func @affine_apply_ceildiv
+func @affine_apply_ceildiv(%arg0 : index) -> (index) {
+// CHECK-NEXT:  %c42 = constant 42 : index
+// CHECK-NEXT:  %c0 = constant 0 : index
+// CHECK-NEXT:  %c1 = constant 1 : index
+// CHECK-NEXT:  %0 = cmpi "sle", %arg0, %c0 : index
+// CHECK-NEXT:  %1 = subi %c0, %arg0 : index
+// CHECK-NEXT:  %2 = subi %arg0, %c1 : index
+// CHECK-NEXT:  %3 = select %0, %1, %2 : index
+// CHECK-NEXT:  %4 = divis %3, %c42 : index
+// CHECK-NEXT:  %5 = subi %c0, %4 : index
+// CHECK-NEXT:  %6 = addi %4, %c1 : index
+// CHECK-NEXT:  %7 = select %0, %5, %6 : index
+  %0 = affine_apply #mapceildiv (%arg0)
+  return %0 : index
+}