Implement branch-free single-division lowering of affine division/remainder

This implements the lowering of `floordiv`, `ceildiv` and `mod` operators from
affine expressions to the arithmetic primitive operations.  Integer division
rules in affine expressions explicitly require rounding towards either negative
or positive infinity unlike machine implementations that round towards zero.
In the general case, implementing `floordiv` and `ceildiv` using machine signed
division requires computing both the quotient and the remainder.  When the
divisor is positive, this can be simplified by adjusting the dividend and the
quotient by one and switching signs.

In the current use cases, we are unlikely to encounter affine expressions with
negative divisors (affine divisions appear in loop transformations such as
tiling that guarantee that divisors are positive by construction).  Therefore,
it is reasonable to use branch-free single-division implementation.  In case of
affine maps, divisors can only be literals so we can check the sign and
implement the case for negative divisors when the need arises.

The affine lowering pass can still fail when applied to semi-affine maps
(division or modulo by a symbol).

PiperOrigin-RevId: 228668181
This commit is contained in:
Alex Zinenko 2019-01-10 01:44:32 -08:00 committed by jpienaar
parent 56b99b4045
commit 9003490287
3 changed files with 284 additions and 12 deletions

View File

@ -61,22 +61,126 @@ public:
return buildBinaryExpr<MulIOp>(expr);
}
// TODO(zinenko): implement when the standard operators are made available.
Value *visitModExpr(AffineBinaryOpExpr) {
builder.getContext()->emitError(loc, "unsupported binary operator: mod");
return nullptr;
// Euclidean modulo operation: negative RHS is not allowed.
// Remainder of the euclidean integer division is always non-negative.
//
// Implemented as
//
// a mod b =
// let remainder = srem a, b;
// negative = a < 0 in
// select negative, remainder + b, remainder.
Value *visitModExpr(AffineBinaryOpExpr expr) {
auto rhsConst = expr.getRHS().dyn_cast<AffineConstantExpr>();
if (!rhsConst) {
builder.getContext()->emitError(
loc,
"semi-affine expressions (modulo by non-const) are not supported");
return nullptr;
}
if (rhsConst.getValue() <= 0) {
builder.getContext()->emitError(
loc, "modulo by non-positive value is not supported");
return nullptr;
}
auto lhs = visit(expr.getLHS());
auto rhs = visit(expr.getRHS());
assert(lhs && rhs && "unexpected affine expr lowering failure");
Value *remainder = builder.create<RemISOp>(loc, lhs, rhs);
Value *zeroCst = builder.create<ConstantIndexOp>(loc, 0);
Value *isRemainderNegative =
builder.create<CmpIOp>(loc, CmpIPredicate::SLT, remainder, zeroCst);
Value *correctedRemainder = builder.create<AddIOp>(loc, remainder, rhs);
Value *result = builder.create<SelectOp>(loc, isRemainderNegative,
correctedRemainder, remainder);
return result;
}
Value *visitFloorDivExpr(AffineBinaryOpExpr) {
builder.getContext()->emitError(loc,
"unsupported binary operator: floor_div");
return nullptr;
// Floor division operation (rounds towards negative infinity).
//
// For positive divisors, it can be implemented without branching and with a
// single division instruction as
//
// a floordiv b =
// let negative = a < 0 in
// let absolute = negative ? -a - 1 : a in
// let quotient = absolute / b in
// negative ? -quotient - 1 : quotient
Value *visitFloorDivExpr(AffineBinaryOpExpr expr) {
auto rhsConst = expr.getRHS().dyn_cast<AffineConstantExpr>();
if (!rhsConst) {
builder.getContext()->emitError(
loc,
"semi-affine expressions (division by non-const) are not supported");
return nullptr;
}
if (rhsConst.getValue() <= 0) {
builder.getContext()->emitError(
loc, "division by non-positive value is not supported");
return nullptr;
}
auto lhs = visit(expr.getLHS());
auto rhs = visit(expr.getRHS());
assert(lhs && rhs && "unexpected affine expr lowering failure");
Value *zeroCst = builder.create<ConstantIndexOp>(loc, 0);
Value *noneCst = builder.create<ConstantIndexOp>(loc, -1);
Value *negative =
builder.create<CmpIOp>(loc, CmpIPredicate::SLT, lhs, zeroCst);
Value *negatedDecremented = builder.create<SubIOp>(loc, noneCst, lhs);
Value *dividend =
builder.create<SelectOp>(loc, negative, negatedDecremented, lhs);
Value *quotient = builder.create<DivISOp>(loc, dividend, rhs);
Value *correctedQuotient = builder.create<SubIOp>(loc, noneCst, quotient);
Value *result =
builder.create<SelectOp>(loc, negative, correctedQuotient, quotient);
return result;
}
Value *visitCeilDivExpr(AffineBinaryOpExpr) {
builder.getContext()->emitError(loc,
"unsupported binary operator: ceil_div");
return nullptr;
// Ceiling division operation (rounds towards positive infinity).
//
// For positive divisors, it can be implemented without branching and with a
// single division instruction as
//
// a ceildiv b =
// let negative = a <= 0 in
// let absolute = negative ? -a : a - 1 in
// let quotient = absolute / b in
// negative ? -quotient : quotient + 1
Value *visitCeilDivExpr(AffineBinaryOpExpr expr) {
auto rhsConst = expr.getRHS().dyn_cast<AffineConstantExpr>();
if (!rhsConst) {
builder.getContext()->emitError(
loc,
"semi-affine expressions (division by non-const) are not supported");
return nullptr;
}
if (rhsConst.getValue() <= 0) {
builder.getContext()->emitError(
loc, "division by non-positive value is not supported");
return nullptr;
}
auto lhs = visit(expr.getLHS());
auto rhs = visit(expr.getRHS());
assert(lhs && rhs && "unexpected affine expr lowering failure");
Value *zeroCst = builder.create<ConstantIndexOp>(loc, 0);
Value *oneCst = builder.create<ConstantIndexOp>(loc, 1);
Value *nonPositive =
builder.create<CmpIOp>(loc, CmpIPredicate::SLE, lhs, zeroCst);
Value *negated = builder.create<SubIOp>(loc, zeroCst, lhs);
Value *decremented = builder.create<SubIOp>(loc, lhs, oneCst);
Value *dividend =
builder.create<SelectOp>(loc, nonPositive, negated, decremented);
Value *quotient = builder.create<DivISOp>(loc, dividend, rhs);
Value *negatedQuotient = builder.create<SubIOp>(loc, zeroCst, quotient);
Value *incrementedQuotient = builder.create<AddIOp>(loc, quotient, oneCst);
Value *result = builder.create<SelectOp>(loc, nonPositive, negatedQuotient,
incrementedQuotient);
return result;
}
Value *visitConstantExpr(AffineConstantExpr expr) {

View File

@ -253,3 +253,99 @@ func @cmpi() -> (i1, i1, i1, i1, i1, i1, i1, i1, i1, i1) {
%9 = cmpi "uge", %c42, %cm1 : i32
return %0, %1, %2, %3, %4, %5, %6, %7, %8, %9 : i1, i1, i1, i1, i1, i1, i1, i1, i1, i1
}
// --------------------------------------------------------------------------//
// IMPORTANT NOTE: the operations in this test are exactly those produced by
// lowering affine_apply (i) -> (i mod 42) to standard operations. Please only
// change these operations together with the affine lowering pass tests.
// --------------------------------------------------------------------------//
// CHECK-LABEL: @lowered_affine_mod
func @lowered_affine_mod() -> (index, index) {
// CHECK-NEXT: {{.*}} = constant 41 : index
%c-43 = constant -43 : index
%c42 = constant 42 : index
%0 = remis %c-43, %c42 : index
%c0 = constant 0 : index
%1 = cmpi "slt", %0, %c0 : index
%2 = addi %0, %c42 : index
%3 = select %1, %2, %0 : index
// CHEKC-NEXT: {{.*}} = constant 1 : index
%c43 = constant 43 : index
%c42_0 = constant 42 : index
%4 = remis %c43, %c42_0 : index
%c0_1 = constant 0 : index
%5 = cmpi "slt", %4, %c0_1 : index
%6 = addi %4, %c42_0 : index
%7 = select %5, %6, %4 : index
return %3, %7 : index, index
}
// --------------------------------------------------------------------------//
// IMPORTANT NOTE: the operations in this test are exactly those produced by
// lowering affine_apply (i) -> (i mod 42) to standard operations. Please only
// change these operations together with the affine lowering pass tests.
// --------------------------------------------------------------------------//
// CHECK-LABEL: func @lowered_affine_floordiv
func @lowered_affine_floordiv() -> (index, index) {
// CHECK-NEXT: %c-2 = constant -2 : index
%c-43 = constant -43 : index
%c42 = constant 42 : index
%c0 = constant 0 : index
%c-1 = constant -1 : index
%0 = cmpi "slt", %c-43, %c0 : index
%1 = subi %c-1, %c-43 : index
%2 = select %0, %1, %c-43 : index
%3 = divis %2, %c42 : index
%4 = subi %c-1, %3 : index
%5 = select %0, %4, %3 : index
// CHECK-NEXT: %c1 = constant 1 : index
%c43 = constant 43 : index
%c42_0 = constant 42 : index
%c0_1 = constant 0 : index
%c-1_2 = constant -1 : index
%6 = cmpi "slt", %c43, %c0_1 : index
%7 = subi %c-1_2, %c43 : index
%8 = select %6, %7, %c43 : index
%9 = divis %8, %c42_0 : index
%10 = subi %c-1_2, %9 : index
%11 = select %6, %10, %9 : index
return %5, %11 : index, index
}
// --------------------------------------------------------------------------//
// IMPORTANT NOTE: the operations in this test are exactly those produced by
// lowering affine_apply (i) -> (i mod 42) to standard operations. Please only
// change these operations together with the affine lowering pass tests.
// --------------------------------------------------------------------------//
// CHECK-LABEL: func @lowered_affine_ceildiv
func @lowered_affine_ceildiv() -> (index, index) {
// CHECK-NEXT: %c-1 = constant -1 : index
%c-43 = constant -43 : index
%c42 = constant 42 : index
%c0 = constant 0 : index
%c1 = constant 1 : index
%0 = cmpi "sle", %c-43, %c0 : index
%1 = subi %c0, %c-43 : index
%2 = subi %c-43, %c1 : index
%3 = select %0, %1, %2 : index
%4 = divis %3, %c42 : index
%5 = subi %c0, %4 : index
%6 = addi %4, %c1 : index
%7 = select %0, %5, %6 : index
// CHECK-NEXT: %c2 = constant 2 : index
%c43 = constant 43 : index
%c42_0 = constant 42 : index
%c0_1 = constant 0 : index
%c1_2 = constant 1 : index
%8 = cmpi "sle", %c43, %c0_1 : index
%9 = subi %c0_1, %c43 : index
%10 = subi %c43, %c1_2 : index
%11 = select %8, %9, %10 : index
%12 = divis %11, %c42_0 : index
%13 = subi %c0_1, %12 : index
%14 = addi %12, %c1_2 : index
%15 = select %8, %13, %14 : index
return %7, %15 : index, index
}

View File

@ -576,3 +576,75 @@ func @args_ret_affine_apply(index, index) -> (index, index) {
%11 = affine_apply #map1 ()[%1]
return %00, %11 : index, index
}
//===---------------------------------------------------------------------===//
// Test lowering of Euclidean (floor) division, ceil division and modulo
// operation used in affine expressions. In addition to testing the
// instruction-level output, check that the obtained results are correct by
// applying constant folding transformation after affine lowering.
//===---------------------------------------------------------------------===//
#mapmod = (i) -> (i mod 42)
// --------------------------------------------------------------------------//
// IMPORTANT NOTE: if you change this test, also change the @lowered_affine_mod
// test in the "constant-fold.mlir" test to reflect the expected output of
// affine_apply lowering.
// --------------------------------------------------------------------------//
// CHECK-LABEL: func @affine_apply_mod
func @affine_apply_mod(%arg0 : index) -> (index) {
// CHECK-NEXT: %c42 = constant 42 : index
// CHECK-NEXT: %0 = remis %arg0, %c42 : index
// CHECK-NEXT: %c0 = constant 0 : index
// CHECK-NEXT: %1 = cmpi "slt", %0, %c0 : index
// CHECK-NEXT: %2 = addi %0, %c42 : index
// CHECK-NEXT: %3 = select %1, %2, %0 : index
%0 = affine_apply #mapmod (%arg0)
return %0 : index
}
#mapfloordiv = (i) -> (i floordiv 42)
// --------------------------------------------------------------------------//
// IMPORTANT NOTE: if you change this test, also change the @lowered_affine_mod
// test in the "constant-fold.mlir" test to reflect the expected output of
// affine_apply lowering.
// --------------------------------------------------------------------------//
// CHECK-LABEL: func @affine_apply_floordiv
func @affine_apply_floordiv(%arg0 : index) -> (index) {
// CHECK-NEXT: %c42 = constant 42 : index
// CHECK-NEXT: %c0 = constant 0 : index
// CHECK-NEXT: %c-1 = constant -1 : index
// CHECK-NEXT: %0 = cmpi "slt", %arg0, %c0 : index
// CHECK-NEXT: %1 = subi %c-1, %arg0 : index
// CHECK-NEXT: %2 = select %0, %1, %arg0 : index
// CHECK-NEXT: %3 = divis %2, %c42 : index
// CHECK-NEXT: %4 = subi %c-1, %3 : index
// CHECK-NEXT: %5 = select %0, %4, %3 : index
%0 = affine_apply #mapfloordiv (%arg0)
return %0 : index
}
#mapceildiv = (i) -> (i ceildiv 42)
// --------------------------------------------------------------------------//
// IMPORTANT NOTE: if you change this test, also change the @lowered_affine_mod
// test in the "constant-fold.mlir" test to reflect the expected output of
// affine_apply lowering.
// --------------------------------------------------------------------------//
// CHECK-LABEL: func @affine_apply_ceildiv
func @affine_apply_ceildiv(%arg0 : index) -> (index) {
// CHECK-NEXT: %c42 = constant 42 : index
// CHECK-NEXT: %c0 = constant 0 : index
// CHECK-NEXT: %c1 = constant 1 : index
// CHECK-NEXT: %0 = cmpi "sle", %arg0, %c0 : index
// CHECK-NEXT: %1 = subi %c0, %arg0 : index
// CHECK-NEXT: %2 = subi %arg0, %c1 : index
// CHECK-NEXT: %3 = select %0, %1, %2 : index
// CHECK-NEXT: %4 = divis %3, %c42 : index
// CHECK-NEXT: %5 = subi %c0, %4 : index
// CHECK-NEXT: %6 = addi %4, %c1 : index
// CHECK-NEXT: %7 = select %0, %5, %6 : index
%0 = affine_apply #mapceildiv (%arg0)
return %0 : index
}