forked from OSchip/llvm-project
[mlir] Support masked N-D vector transfer ops in ProgressiveVectorToSCF.
Mask vectors are handled similar to data vectors in N-D TransferWriteOp. They are copied into a temporary memory buffer, which can be indexed into with non-constant values. Differential Revision: https://reviews.llvm.org/D101136
This commit is contained in:
parent
c623945d70
commit
64f7fb5dfc
|
@ -56,16 +56,34 @@ static MemRefType unpackOneDim(MemRefType type) {
|
|||
vectorType.getElementType()));
|
||||
}
|
||||
|
||||
// TODO: Parallelism and threadlocal considerations.
|
||||
static Value setAllocAtFunctionEntry(MemRefType type, Operation *op) {
|
||||
/// Helper data structure for data and mask buffers.
|
||||
struct BufferAllocs {
|
||||
Value dataBuffer;
|
||||
Value maskBuffer;
|
||||
};
|
||||
|
||||
/// Allocate temporary buffers for data (vector) and mask (if present).
|
||||
/// TODO: Parallelism and threadlocal considerations.
|
||||
template <typename OpTy>
|
||||
static BufferAllocs allocBuffers(OpTy xferOp) {
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
OpBuilder::InsertionGuard guard(b);
|
||||
Operation *scope =
|
||||
op->getParentWithTrait<OpTrait::AutomaticAllocationScope>();
|
||||
xferOp->template getParentWithTrait<OpTrait::AutomaticAllocationScope>();
|
||||
assert(scope && "Expected op to be inside automatic allocation scope");
|
||||
b.setInsertionPointToStart(&scope->getRegion(0).front());
|
||||
Value res = memref_alloca(type);
|
||||
return res;
|
||||
|
||||
BufferAllocs result;
|
||||
auto bufferType = MemRefType::get({}, xferOp.getVectorType());
|
||||
result.dataBuffer = memref_alloca(bufferType).value;
|
||||
|
||||
if (xferOp.mask()) {
|
||||
auto maskType = MemRefType::get({}, xferOp.mask().getType());
|
||||
result.maskBuffer = memref_alloca(maskType).value;
|
||||
memref_store(xferOp.mask(), result.maskBuffer);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Given a vector transfer op, calculate which dimension of the `source`
|
||||
|
@ -238,6 +256,16 @@ static ArrayAttr dropFirstElem(OpBuilder &builder, ArrayAttr attr) {
|
|||
return ArrayAttr::get(builder.getContext(), attr.getValue().drop_front());
|
||||
}
|
||||
|
||||
/// Given a transfer op, find the memref from which the mask is loaded. This
|
||||
/// is similar to Strategy<TransferWriteOp>::getBuffer.
|
||||
template <typename OpTy>
|
||||
static Value getMaskBuffer(OpTy xferOp) {
|
||||
assert(xferOp.mask() && "Expected that transfer op has mask");
|
||||
auto loadOp = xferOp.mask().template getDefiningOp<memref::LoadOp>();
|
||||
assert(loadOp && "Expected transfer op mask produced by LoadOp");
|
||||
return loadOp.getMemRef();
|
||||
}
|
||||
|
||||
/// Codegen strategy, depending on the operation.
|
||||
template <typename OpTy>
|
||||
struct Strategy;
|
||||
|
@ -266,9 +294,9 @@ struct Strategy<TransferReadOp> {
|
|||
return getStoreOp(xferOp).getMemRef();
|
||||
}
|
||||
|
||||
/// Retrieve the indices of the current StoreOp.
|
||||
static void getStoreIndices(TransferReadOp xferOp,
|
||||
SmallVector<Value, 8> &indices) {
|
||||
/// Retrieve the indices of the current StoreOp that stores into the buffer.
|
||||
static void getBufferIndices(TransferReadOp xferOp,
|
||||
SmallVector<Value, 8> &indices) {
|
||||
auto storeOp = getStoreOp(xferOp);
|
||||
auto prevIndices = memref::StoreOpAdaptor(storeOp).indices();
|
||||
indices.append(prevIndices.begin(), prevIndices.end());
|
||||
|
@ -300,10 +328,11 @@ struct Strategy<TransferReadOp> {
|
|||
///
|
||||
/// Note: The loop and type cast are generated in TransferOpConversion.
|
||||
/// The original TransferReadOp and store op are deleted in `cleanup`.
|
||||
static void rewriteOp(OpBuilder &builder, TransferReadOp xferOp,
|
||||
Value buffer, Value iv) {
|
||||
/// Note: The `mask` operand is set in TransferOpConversion.
|
||||
static TransferReadOp rewriteOp(OpBuilder &builder, TransferReadOp xferOp,
|
||||
Value buffer, Value iv) {
|
||||
SmallVector<Value, 8> storeIndices;
|
||||
getStoreIndices(xferOp, storeIndices);
|
||||
getBufferIndices(xferOp, storeIndices);
|
||||
storeIndices.push_back(iv);
|
||||
|
||||
SmallVector<Value, 8> xferIndices;
|
||||
|
@ -321,6 +350,7 @@ struct Strategy<TransferReadOp> {
|
|||
newXfer.getDefiningOp()->setAttr(kPassLabel, builder.getUnitAttr());
|
||||
|
||||
memref_store(newXfer, buffer, storeIndices);
|
||||
return newXfer.getDefiningOp<TransferReadOp>();
|
||||
}
|
||||
|
||||
/// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write
|
||||
|
@ -329,7 +359,7 @@ struct Strategy<TransferReadOp> {
|
|||
OpBuilder &/*builder*/, TransferReadOp xferOp, Value buffer,
|
||||
Value iv) {
|
||||
SmallVector<Value, 8> storeIndices;
|
||||
getStoreIndices(xferOp, storeIndices);
|
||||
getBufferIndices(xferOp, storeIndices);
|
||||
storeIndices.push_back(iv);
|
||||
|
||||
auto bufferType = buffer.getType().dyn_cast<ShapedType>();
|
||||
|
@ -361,9 +391,9 @@ struct Strategy<TransferWriteOp> {
|
|||
return loadOp.getMemRef();
|
||||
}
|
||||
|
||||
/// Retrieve the indices of the current LoadOp.
|
||||
static void getLoadIndices(TransferWriteOp xferOp,
|
||||
SmallVector<Value, 8> &indices) {
|
||||
/// Retrieve the indices of the current LoadOp that loads from the buffer.
|
||||
static void getBufferIndices(TransferWriteOp xferOp,
|
||||
SmallVector<Value, 8> &indices) {
|
||||
auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>();
|
||||
auto prevIndices = memref::LoadOpAdaptor(loadOp).indices();
|
||||
indices.append(prevIndices.begin(), prevIndices.end());
|
||||
|
@ -378,10 +408,10 @@ struct Strategy<TransferWriteOp> {
|
|||
/// to memory.
|
||||
///
|
||||
/// Note: For more details, see comments on Strategy<TransferReadOp>.
|
||||
static void rewriteOp(OpBuilder &builder, TransferWriteOp xferOp,
|
||||
Value buffer, Value iv) {
|
||||
static TransferWriteOp rewriteOp(OpBuilder &builder, TransferWriteOp xferOp,
|
||||
Value buffer, Value iv) {
|
||||
SmallVector<Value, 8> loadIndices;
|
||||
getLoadIndices(xferOp, loadIndices);
|
||||
getBufferIndices(xferOp, loadIndices);
|
||||
loadIndices.push_back(iv);
|
||||
|
||||
SmallVector<Value, 8> xferIndices;
|
||||
|
@ -397,6 +427,8 @@ struct Strategy<TransferWriteOp> {
|
|||
|
||||
if (vecType.getRank() > kTargetRank)
|
||||
newXfer.op->setAttr(kPassLabel, builder.getUnitAttr());
|
||||
|
||||
return newXfer;
|
||||
}
|
||||
|
||||
/// Handle out-of-bounds accesses on the to-be-unpacked dimension.
|
||||
|
@ -416,8 +448,6 @@ LogicalResult checkPrepareXferOp(OpTy xferOp) {
|
|||
return failure();
|
||||
if (xferOp.getVectorType().getRank() <= kTargetRank)
|
||||
return failure();
|
||||
if (xferOp.mask())
|
||||
return failure();
|
||||
return success();
|
||||
}
|
||||
|
||||
|
@ -442,6 +472,8 @@ LogicalResult checkPrepareXferOp(OpTy xferOp) {
|
|||
/// memref.store %1, %0[] : memref<vector<5x4xf32>>
|
||||
/// %vec = memref.load %0[] : memref<vector<5x4xf32>>
|
||||
/// ```
|
||||
///
|
||||
/// Note: A second temporary buffer may be allocated for the `mask` operand.
|
||||
struct PrepareTransferReadConversion
|
||||
: public OpRewritePattern<TransferReadOp> {
|
||||
using OpRewritePattern<TransferReadOp>::OpRewritePattern;
|
||||
|
@ -452,12 +484,16 @@ struct PrepareTransferReadConversion
|
|||
return failure();
|
||||
|
||||
ScopedContext scope(rewriter, xferOp.getLoc());
|
||||
auto allocType = MemRefType::get({}, xferOp.getVectorType());
|
||||
auto buffer = setAllocAtFunctionEntry(allocType, xferOp);
|
||||
auto buffers = allocBuffers(xferOp);
|
||||
auto *newXfer = rewriter.clone(*xferOp.getOperation());
|
||||
newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
|
||||
memref_store(newXfer->getResult(0), buffer);
|
||||
rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffer);
|
||||
if (xferOp.mask()) {
|
||||
auto loadedMask = memref_load(buffers.maskBuffer);
|
||||
dyn_cast<TransferReadOp>(newXfer).maskMutable().assign(loadedMask);
|
||||
}
|
||||
|
||||
memref_store(newXfer->getResult(0), buffers.dataBuffer);
|
||||
rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffers.dataBuffer);
|
||||
|
||||
return success();
|
||||
}
|
||||
|
@ -484,6 +520,8 @@ struct PrepareTransferReadConversion
|
|||
/// vector.transfer_write %1, %A[%a, %b, %c] { __vector_to_scf_lowering__ }
|
||||
/// : vector<5x4xf32>, memref<?x?x?xf32>
|
||||
/// ```
|
||||
///
|
||||
/// Note: A second temporary buffer may be allocated for the `mask` operand.
|
||||
struct PrepareTransferWriteConversion
|
||||
: public OpRewritePattern<TransferWriteOp> {
|
||||
using OpRewritePattern<TransferWriteOp>::OpRewritePattern;
|
||||
|
@ -494,16 +532,20 @@ struct PrepareTransferWriteConversion
|
|||
return failure();
|
||||
|
||||
ScopedContext scope(rewriter, xferOp.getLoc());
|
||||
auto allocType = MemRefType::get({}, xferOp.getVectorType());
|
||||
auto buffer = setAllocAtFunctionEntry(allocType, xferOp);
|
||||
memref_store(xferOp.vector(), buffer);
|
||||
auto loadedVec = memref_load(buffer);
|
||||
|
||||
auto buffers = allocBuffers(xferOp);
|
||||
memref_store(xferOp.vector(), buffers.dataBuffer);
|
||||
auto loadedVec = memref_load(buffers.dataBuffer);
|
||||
rewriter.updateRootInPlace(xferOp, [&]() {
|
||||
xferOp.vectorMutable().assign(loadedVec);
|
||||
xferOp->setAttr(kPassLabel, rewriter.getUnitAttr());
|
||||
});
|
||||
|
||||
if (xferOp.mask()) {
|
||||
auto loadedMask = memref_load(buffers.maskBuffer);
|
||||
rewriter.updateRootInPlace(
|
||||
xferOp, [&]() { xferOp.maskMutable().assign(loadedMask); });
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
@ -535,16 +577,28 @@ struct TransferOpConversion : public OpRewritePattern<OpTy> {
|
|||
return failure();
|
||||
|
||||
ScopedContext scope(rewriter, xferOp.getLoc());
|
||||
// How the buffer can be found depends on OpTy.
|
||||
auto buffer = Strategy<OpTy>::getBuffer(xferOp);
|
||||
auto bufferType = buffer.getType().template dyn_cast<MemRefType>();
|
||||
auto castedType = unpackOneDim(bufferType);
|
||||
auto casted = vector_type_cast(castedType, buffer);
|
||||
|
||||
// Find and cast data buffer. How the buffer can be found depends on OpTy.
|
||||
auto dataBuffer = Strategy<OpTy>::getBuffer(xferOp);
|
||||
auto dataBufferType = dataBuffer.getType().template dyn_cast<MemRefType>();
|
||||
auto castedDataType = unpackOneDim(dataBufferType);
|
||||
auto castedDataBuffer = vector_type_cast(castedDataType, dataBuffer);
|
||||
|
||||
// If the xferOp has a mask: Find and cast mask buffer.
|
||||
Value castedMaskBuffer;
|
||||
if (xferOp.mask()) {
|
||||
auto maskBuffer = getMaskBuffer(xferOp);
|
||||
auto maskBufferType =
|
||||
maskBuffer.getType().template dyn_cast<MemRefType>();
|
||||
auto castedMaskType = unpackOneDim(maskBufferType);
|
||||
castedMaskBuffer = vector_type_cast(castedMaskType, maskBuffer);
|
||||
}
|
||||
|
||||
// Loop bounds and step.
|
||||
auto lb = std_constant_index(0).value;
|
||||
auto ub = std_constant_index(
|
||||
castedType.getDimSize(castedType.getRank() - 1)).value;
|
||||
castedDataType.getDimSize(castedDataType.getRank() - 1))
|
||||
.value;
|
||||
auto step = std_constant_index(1).value;
|
||||
|
||||
// Generate for loop.
|
||||
|
@ -555,11 +609,31 @@ struct TransferOpConversion : public OpRewritePattern<OpTy> {
|
|||
ScopedContext scope(b, loc);
|
||||
generateInBoundsCheck(
|
||||
xferOp, iv, b, unpackedDim(xferOp),
|
||||
/*inBoundsCase=*/[&](OpBuilder &b, Location /*loc*/) {
|
||||
Strategy<OpTy>::rewriteOp(b, xferOp, casted, iv);
|
||||
}, /*outOfBoundsCase=*/[&](OpBuilder &b, Location /*loc*/) {
|
||||
Strategy<OpTy>::handleOutOfBoundsDim(b, xferOp, casted, iv);
|
||||
});
|
||||
/*inBoundsCase=*/
|
||||
[&](OpBuilder &b, Location /*loc*/) {
|
||||
// Create new transfer op.
|
||||
OpTy newXfer =
|
||||
Strategy<OpTy>::rewriteOp(b, xferOp, castedDataBuffer, iv);
|
||||
|
||||
// If old transfer op has a mask: Set mask on new transfer op.
|
||||
if (xferOp.mask()) {
|
||||
OpBuilder::InsertionGuard guard(b);
|
||||
b.setInsertionPoint(newXfer); // Insert load before newXfer.
|
||||
|
||||
SmallVector<Value, 8> loadIndices;
|
||||
Strategy<OpTy>::getBufferIndices(xferOp, loadIndices);
|
||||
loadIndices.push_back(iv);
|
||||
|
||||
auto mask = memref_load(castedMaskBuffer, loadIndices);
|
||||
rewriter.updateRootInPlace(
|
||||
newXfer, [&]() { newXfer.maskMutable().assign(mask); });
|
||||
}
|
||||
},
|
||||
/*outOfBoundsCase=*/
|
||||
[&](OpBuilder &b, Location /*loc*/) {
|
||||
Strategy<OpTy>::handleOutOfBoundsDim(b, xferOp, castedDataBuffer,
|
||||
iv);
|
||||
});
|
||||
b.create<scf::YieldOp>(loc);
|
||||
});
|
||||
|
||||
|
|
|
@ -1,8 +1,3 @@
|
|||
// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
|
||||
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
|
||||
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
|
||||
// RUN: FileCheck %s
|
||||
|
||||
// RUN: mlir-opt %s -test-progressive-convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
|
||||
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
|
||||
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
|
||||
|
@ -17,6 +12,19 @@ func @transfer_read_2d(%A : memref<?x?xf32>, %base1: index, %base2: index) {
|
|||
return
|
||||
}
|
||||
|
||||
func @transfer_read_2d_mask(%A : memref<?x?xf32>, %base1: index, %base2: index) {
|
||||
%fm42 = constant -42.0: f32
|
||||
%mask = constant dense<[[1, 0, 1, 0, 1, 1, 1, 0, 1],
|
||||
[0, 0, 1, 1, 1, 1, 1, 0, 1],
|
||||
[1, 1, 1, 1, 1, 1, 1, 0, 1],
|
||||
[0, 0, 1, 0, 1, 1, 1, 0, 1]]> : vector<4x9xi1>
|
||||
%f = vector.transfer_read %A[%base1, %base2], %fm42, %mask
|
||||
{permutation_map = affine_map<(d0, d1) -> (d0, d1)>} :
|
||||
memref<?x?xf32>, vector<4x9xf32>
|
||||
vector.print %f: vector<4x9xf32>
|
||||
return
|
||||
}
|
||||
|
||||
func @transfer_read_2d_transposed(
|
||||
%A : memref<?x?xf32>, %base1: index, %base2: index) {
|
||||
%fm42 = constant -42.0: f32
|
||||
|
@ -80,7 +88,10 @@ func @entry() {
|
|||
call @transfer_write_2d(%A, %c3, %c1) : (memref<?x?xf32>, index, index) -> ()
|
||||
// Read shifted by 0 and pad with -42:
|
||||
call @transfer_read_2d(%A, %c0, %c0) : (memref<?x?xf32>, index, index) -> ()
|
||||
// Same as above, but transposed
|
||||
// Same as above, but apply a mask
|
||||
call @transfer_read_2d_mask(%A, %c0, %c0)
|
||||
: (memref<?x?xf32>, index, index) -> ()
|
||||
// Same as above, but without mask and transposed
|
||||
call @transfer_read_2d_transposed(%A, %c0, %c0)
|
||||
: (memref<?x?xf32>, index, index) -> ()
|
||||
// Second vector dimension is a broadcast
|
||||
|
@ -92,5 +103,6 @@ func @entry() {
|
|||
// CHECK: ( ( 12, 13, -42, -42, -42, -42, -42, -42, -42 ), ( 22, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
||||
// CHECK: ( ( 12, 22, -42, -42, -42, -42, -42, -42, -42 ), ( 13, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
||||
// CHECK: ( ( 0, 1, 2, 3, -42, -42, -42, -42, -42 ), ( 10, 11, 12, 13, -42, -42, -42, -42, -42 ), ( 20, 21, 22, 23, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
||||
// CHECK: ( ( 0, -42, 2, -42, -42, -42, -42, -42, -42 ), ( -42, -42, 12, 13, -42, -42, -42, -42, -42 ), ( 20, 21, 22, 23, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
||||
// CHECK: ( ( 0, 10, 20, -42, -42, -42, -42, -42, -42 ), ( 1, 11, 21, -42, -42, -42, -42, -42, -42 ), ( 2, 12, 22, -42, -42, -42, -42, -42, -42 ), ( 3, 13, 23, -42, -42, -42, -42, -42, -42 ) )
|
||||
// CHECK: ( ( 12, 12, 12, 12, 12, 12, 12, 12, 12 ), ( 13, 13, 13, 13, 13, 13, 13, 13, 13 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
||||
|
|
Loading…
Reference in New Issue