forked from OSchip/llvm-project
[mlir] Support masked N-D vector transfer ops in ProgressiveVectorToSCF.
Mask vectors are handled similar to data vectors in N-D TransferWriteOp. They are copied into a temporary memory buffer, which can be indexed into with non-constant values. Differential Revision: https://reviews.llvm.org/D101136
This commit is contained in:
parent
c623945d70
commit
64f7fb5dfc
|
@ -56,16 +56,34 @@ static MemRefType unpackOneDim(MemRefType type) {
|
||||||
vectorType.getElementType()));
|
vectorType.getElementType()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Parallelism and threadlocal considerations.
|
/// Helper data structure for data and mask buffers.
|
||||||
static Value setAllocAtFunctionEntry(MemRefType type, Operation *op) {
|
struct BufferAllocs {
|
||||||
|
Value dataBuffer;
|
||||||
|
Value maskBuffer;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Allocate temporary buffers for data (vector) and mask (if present).
|
||||||
|
/// TODO: Parallelism and threadlocal considerations.
|
||||||
|
template <typename OpTy>
|
||||||
|
static BufferAllocs allocBuffers(OpTy xferOp) {
|
||||||
auto &b = ScopedContext::getBuilderRef();
|
auto &b = ScopedContext::getBuilderRef();
|
||||||
OpBuilder::InsertionGuard guard(b);
|
OpBuilder::InsertionGuard guard(b);
|
||||||
Operation *scope =
|
Operation *scope =
|
||||||
op->getParentWithTrait<OpTrait::AutomaticAllocationScope>();
|
xferOp->template getParentWithTrait<OpTrait::AutomaticAllocationScope>();
|
||||||
assert(scope && "Expected op to be inside automatic allocation scope");
|
assert(scope && "Expected op to be inside automatic allocation scope");
|
||||||
b.setInsertionPointToStart(&scope->getRegion(0).front());
|
b.setInsertionPointToStart(&scope->getRegion(0).front());
|
||||||
Value res = memref_alloca(type);
|
|
||||||
return res;
|
BufferAllocs result;
|
||||||
|
auto bufferType = MemRefType::get({}, xferOp.getVectorType());
|
||||||
|
result.dataBuffer = memref_alloca(bufferType).value;
|
||||||
|
|
||||||
|
if (xferOp.mask()) {
|
||||||
|
auto maskType = MemRefType::get({}, xferOp.mask().getType());
|
||||||
|
result.maskBuffer = memref_alloca(maskType).value;
|
||||||
|
memref_store(xferOp.mask(), result.maskBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Given a vector transfer op, calculate which dimension of the `source`
|
/// Given a vector transfer op, calculate which dimension of the `source`
|
||||||
|
@ -238,6 +256,16 @@ static ArrayAttr dropFirstElem(OpBuilder &builder, ArrayAttr attr) {
|
||||||
return ArrayAttr::get(builder.getContext(), attr.getValue().drop_front());
|
return ArrayAttr::get(builder.getContext(), attr.getValue().drop_front());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Given a transfer op, find the memref from which the mask is loaded. This
|
||||||
|
/// is similar to Strategy<TransferWriteOp>::getBuffer.
|
||||||
|
template <typename OpTy>
|
||||||
|
static Value getMaskBuffer(OpTy xferOp) {
|
||||||
|
assert(xferOp.mask() && "Expected that transfer op has mask");
|
||||||
|
auto loadOp = xferOp.mask().template getDefiningOp<memref::LoadOp>();
|
||||||
|
assert(loadOp && "Expected transfer op mask produced by LoadOp");
|
||||||
|
return loadOp.getMemRef();
|
||||||
|
}
|
||||||
|
|
||||||
/// Codegen strategy, depending on the operation.
|
/// Codegen strategy, depending on the operation.
|
||||||
template <typename OpTy>
|
template <typename OpTy>
|
||||||
struct Strategy;
|
struct Strategy;
|
||||||
|
@ -266,9 +294,9 @@ struct Strategy<TransferReadOp> {
|
||||||
return getStoreOp(xferOp).getMemRef();
|
return getStoreOp(xferOp).getMemRef();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retrieve the indices of the current StoreOp.
|
/// Retrieve the indices of the current StoreOp that stores into the buffer.
|
||||||
static void getStoreIndices(TransferReadOp xferOp,
|
static void getBufferIndices(TransferReadOp xferOp,
|
||||||
SmallVector<Value, 8> &indices) {
|
SmallVector<Value, 8> &indices) {
|
||||||
auto storeOp = getStoreOp(xferOp);
|
auto storeOp = getStoreOp(xferOp);
|
||||||
auto prevIndices = memref::StoreOpAdaptor(storeOp).indices();
|
auto prevIndices = memref::StoreOpAdaptor(storeOp).indices();
|
||||||
indices.append(prevIndices.begin(), prevIndices.end());
|
indices.append(prevIndices.begin(), prevIndices.end());
|
||||||
|
@ -300,10 +328,11 @@ struct Strategy<TransferReadOp> {
|
||||||
///
|
///
|
||||||
/// Note: The loop and type cast are generated in TransferOpConversion.
|
/// Note: The loop and type cast are generated in TransferOpConversion.
|
||||||
/// The original TransferReadOp and store op are deleted in `cleanup`.
|
/// The original TransferReadOp and store op are deleted in `cleanup`.
|
||||||
static void rewriteOp(OpBuilder &builder, TransferReadOp xferOp,
|
/// Note: The `mask` operand is set in TransferOpConversion.
|
||||||
Value buffer, Value iv) {
|
static TransferReadOp rewriteOp(OpBuilder &builder, TransferReadOp xferOp,
|
||||||
|
Value buffer, Value iv) {
|
||||||
SmallVector<Value, 8> storeIndices;
|
SmallVector<Value, 8> storeIndices;
|
||||||
getStoreIndices(xferOp, storeIndices);
|
getBufferIndices(xferOp, storeIndices);
|
||||||
storeIndices.push_back(iv);
|
storeIndices.push_back(iv);
|
||||||
|
|
||||||
SmallVector<Value, 8> xferIndices;
|
SmallVector<Value, 8> xferIndices;
|
||||||
|
@ -321,6 +350,7 @@ struct Strategy<TransferReadOp> {
|
||||||
newXfer.getDefiningOp()->setAttr(kPassLabel, builder.getUnitAttr());
|
newXfer.getDefiningOp()->setAttr(kPassLabel, builder.getUnitAttr());
|
||||||
|
|
||||||
memref_store(newXfer, buffer, storeIndices);
|
memref_store(newXfer, buffer, storeIndices);
|
||||||
|
return newXfer.getDefiningOp<TransferReadOp>();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write
|
/// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write
|
||||||
|
@ -329,7 +359,7 @@ struct Strategy<TransferReadOp> {
|
||||||
OpBuilder &/*builder*/, TransferReadOp xferOp, Value buffer,
|
OpBuilder &/*builder*/, TransferReadOp xferOp, Value buffer,
|
||||||
Value iv) {
|
Value iv) {
|
||||||
SmallVector<Value, 8> storeIndices;
|
SmallVector<Value, 8> storeIndices;
|
||||||
getStoreIndices(xferOp, storeIndices);
|
getBufferIndices(xferOp, storeIndices);
|
||||||
storeIndices.push_back(iv);
|
storeIndices.push_back(iv);
|
||||||
|
|
||||||
auto bufferType = buffer.getType().dyn_cast<ShapedType>();
|
auto bufferType = buffer.getType().dyn_cast<ShapedType>();
|
||||||
|
@ -361,9 +391,9 @@ struct Strategy<TransferWriteOp> {
|
||||||
return loadOp.getMemRef();
|
return loadOp.getMemRef();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retrieve the indices of the current LoadOp.
|
/// Retrieve the indices of the current LoadOp that loads from the buffer.
|
||||||
static void getLoadIndices(TransferWriteOp xferOp,
|
static void getBufferIndices(TransferWriteOp xferOp,
|
||||||
SmallVector<Value, 8> &indices) {
|
SmallVector<Value, 8> &indices) {
|
||||||
auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>();
|
auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>();
|
||||||
auto prevIndices = memref::LoadOpAdaptor(loadOp).indices();
|
auto prevIndices = memref::LoadOpAdaptor(loadOp).indices();
|
||||||
indices.append(prevIndices.begin(), prevIndices.end());
|
indices.append(prevIndices.begin(), prevIndices.end());
|
||||||
|
@ -378,10 +408,10 @@ struct Strategy<TransferWriteOp> {
|
||||||
/// to memory.
|
/// to memory.
|
||||||
///
|
///
|
||||||
/// Note: For more details, see comments on Strategy<TransferReadOp>.
|
/// Note: For more details, see comments on Strategy<TransferReadOp>.
|
||||||
static void rewriteOp(OpBuilder &builder, TransferWriteOp xferOp,
|
static TransferWriteOp rewriteOp(OpBuilder &builder, TransferWriteOp xferOp,
|
||||||
Value buffer, Value iv) {
|
Value buffer, Value iv) {
|
||||||
SmallVector<Value, 8> loadIndices;
|
SmallVector<Value, 8> loadIndices;
|
||||||
getLoadIndices(xferOp, loadIndices);
|
getBufferIndices(xferOp, loadIndices);
|
||||||
loadIndices.push_back(iv);
|
loadIndices.push_back(iv);
|
||||||
|
|
||||||
SmallVector<Value, 8> xferIndices;
|
SmallVector<Value, 8> xferIndices;
|
||||||
|
@ -397,6 +427,8 @@ struct Strategy<TransferWriteOp> {
|
||||||
|
|
||||||
if (vecType.getRank() > kTargetRank)
|
if (vecType.getRank() > kTargetRank)
|
||||||
newXfer.op->setAttr(kPassLabel, builder.getUnitAttr());
|
newXfer.op->setAttr(kPassLabel, builder.getUnitAttr());
|
||||||
|
|
||||||
|
return newXfer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle out-of-bounds accesses on the to-be-unpacked dimension.
|
/// Handle out-of-bounds accesses on the to-be-unpacked dimension.
|
||||||
|
@ -416,8 +448,6 @@ LogicalResult checkPrepareXferOp(OpTy xferOp) {
|
||||||
return failure();
|
return failure();
|
||||||
if (xferOp.getVectorType().getRank() <= kTargetRank)
|
if (xferOp.getVectorType().getRank() <= kTargetRank)
|
||||||
return failure();
|
return failure();
|
||||||
if (xferOp.mask())
|
|
||||||
return failure();
|
|
||||||
return success();
|
return success();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -442,6 +472,8 @@ LogicalResult checkPrepareXferOp(OpTy xferOp) {
|
||||||
/// memref.store %1, %0[] : memref<vector<5x4xf32>>
|
/// memref.store %1, %0[] : memref<vector<5x4xf32>>
|
||||||
/// %vec = memref.load %0[] : memref<vector<5x4xf32>>
|
/// %vec = memref.load %0[] : memref<vector<5x4xf32>>
|
||||||
/// ```
|
/// ```
|
||||||
|
///
|
||||||
|
/// Note: A second temporary buffer may be allocated for the `mask` operand.
|
||||||
struct PrepareTransferReadConversion
|
struct PrepareTransferReadConversion
|
||||||
: public OpRewritePattern<TransferReadOp> {
|
: public OpRewritePattern<TransferReadOp> {
|
||||||
using OpRewritePattern<TransferReadOp>::OpRewritePattern;
|
using OpRewritePattern<TransferReadOp>::OpRewritePattern;
|
||||||
|
@ -452,12 +484,16 @@ struct PrepareTransferReadConversion
|
||||||
return failure();
|
return failure();
|
||||||
|
|
||||||
ScopedContext scope(rewriter, xferOp.getLoc());
|
ScopedContext scope(rewriter, xferOp.getLoc());
|
||||||
auto allocType = MemRefType::get({}, xferOp.getVectorType());
|
auto buffers = allocBuffers(xferOp);
|
||||||
auto buffer = setAllocAtFunctionEntry(allocType, xferOp);
|
|
||||||
auto *newXfer = rewriter.clone(*xferOp.getOperation());
|
auto *newXfer = rewriter.clone(*xferOp.getOperation());
|
||||||
newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
|
newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
|
||||||
memref_store(newXfer->getResult(0), buffer);
|
if (xferOp.mask()) {
|
||||||
rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffer);
|
auto loadedMask = memref_load(buffers.maskBuffer);
|
||||||
|
dyn_cast<TransferReadOp>(newXfer).maskMutable().assign(loadedMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
memref_store(newXfer->getResult(0), buffers.dataBuffer);
|
||||||
|
rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffers.dataBuffer);
|
||||||
|
|
||||||
return success();
|
return success();
|
||||||
}
|
}
|
||||||
|
@ -484,6 +520,8 @@ struct PrepareTransferReadConversion
|
||||||
/// vector.transfer_write %1, %A[%a, %b, %c] { __vector_to_scf_lowering__ }
|
/// vector.transfer_write %1, %A[%a, %b, %c] { __vector_to_scf_lowering__ }
|
||||||
/// : vector<5x4xf32>, memref<?x?x?xf32>
|
/// : vector<5x4xf32>, memref<?x?x?xf32>
|
||||||
/// ```
|
/// ```
|
||||||
|
///
|
||||||
|
/// Note: A second temporary buffer may be allocated for the `mask` operand.
|
||||||
struct PrepareTransferWriteConversion
|
struct PrepareTransferWriteConversion
|
||||||
: public OpRewritePattern<TransferWriteOp> {
|
: public OpRewritePattern<TransferWriteOp> {
|
||||||
using OpRewritePattern<TransferWriteOp>::OpRewritePattern;
|
using OpRewritePattern<TransferWriteOp>::OpRewritePattern;
|
||||||
|
@ -494,16 +532,20 @@ struct PrepareTransferWriteConversion
|
||||||
return failure();
|
return failure();
|
||||||
|
|
||||||
ScopedContext scope(rewriter, xferOp.getLoc());
|
ScopedContext scope(rewriter, xferOp.getLoc());
|
||||||
auto allocType = MemRefType::get({}, xferOp.getVectorType());
|
auto buffers = allocBuffers(xferOp);
|
||||||
auto buffer = setAllocAtFunctionEntry(allocType, xferOp);
|
memref_store(xferOp.vector(), buffers.dataBuffer);
|
||||||
memref_store(xferOp.vector(), buffer);
|
auto loadedVec = memref_load(buffers.dataBuffer);
|
||||||
auto loadedVec = memref_load(buffer);
|
|
||||||
|
|
||||||
rewriter.updateRootInPlace(xferOp, [&]() {
|
rewriter.updateRootInPlace(xferOp, [&]() {
|
||||||
xferOp.vectorMutable().assign(loadedVec);
|
xferOp.vectorMutable().assign(loadedVec);
|
||||||
xferOp->setAttr(kPassLabel, rewriter.getUnitAttr());
|
xferOp->setAttr(kPassLabel, rewriter.getUnitAttr());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (xferOp.mask()) {
|
||||||
|
auto loadedMask = memref_load(buffers.maskBuffer);
|
||||||
|
rewriter.updateRootInPlace(
|
||||||
|
xferOp, [&]() { xferOp.maskMutable().assign(loadedMask); });
|
||||||
|
}
|
||||||
|
|
||||||
return success();
|
return success();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -535,16 +577,28 @@ struct TransferOpConversion : public OpRewritePattern<OpTy> {
|
||||||
return failure();
|
return failure();
|
||||||
|
|
||||||
ScopedContext scope(rewriter, xferOp.getLoc());
|
ScopedContext scope(rewriter, xferOp.getLoc());
|
||||||
// How the buffer can be found depends on OpTy.
|
|
||||||
auto buffer = Strategy<OpTy>::getBuffer(xferOp);
|
// Find and cast data buffer. How the buffer can be found depends on OpTy.
|
||||||
auto bufferType = buffer.getType().template dyn_cast<MemRefType>();
|
auto dataBuffer = Strategy<OpTy>::getBuffer(xferOp);
|
||||||
auto castedType = unpackOneDim(bufferType);
|
auto dataBufferType = dataBuffer.getType().template dyn_cast<MemRefType>();
|
||||||
auto casted = vector_type_cast(castedType, buffer);
|
auto castedDataType = unpackOneDim(dataBufferType);
|
||||||
|
auto castedDataBuffer = vector_type_cast(castedDataType, dataBuffer);
|
||||||
|
|
||||||
|
// If the xferOp has a mask: Find and cast mask buffer.
|
||||||
|
Value castedMaskBuffer;
|
||||||
|
if (xferOp.mask()) {
|
||||||
|
auto maskBuffer = getMaskBuffer(xferOp);
|
||||||
|
auto maskBufferType =
|
||||||
|
maskBuffer.getType().template dyn_cast<MemRefType>();
|
||||||
|
auto castedMaskType = unpackOneDim(maskBufferType);
|
||||||
|
castedMaskBuffer = vector_type_cast(castedMaskType, maskBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
// Loop bounds and step.
|
// Loop bounds and step.
|
||||||
auto lb = std_constant_index(0).value;
|
auto lb = std_constant_index(0).value;
|
||||||
auto ub = std_constant_index(
|
auto ub = std_constant_index(
|
||||||
castedType.getDimSize(castedType.getRank() - 1)).value;
|
castedDataType.getDimSize(castedDataType.getRank() - 1))
|
||||||
|
.value;
|
||||||
auto step = std_constant_index(1).value;
|
auto step = std_constant_index(1).value;
|
||||||
|
|
||||||
// Generate for loop.
|
// Generate for loop.
|
||||||
|
@ -555,11 +609,31 @@ struct TransferOpConversion : public OpRewritePattern<OpTy> {
|
||||||
ScopedContext scope(b, loc);
|
ScopedContext scope(b, loc);
|
||||||
generateInBoundsCheck(
|
generateInBoundsCheck(
|
||||||
xferOp, iv, b, unpackedDim(xferOp),
|
xferOp, iv, b, unpackedDim(xferOp),
|
||||||
/*inBoundsCase=*/[&](OpBuilder &b, Location /*loc*/) {
|
/*inBoundsCase=*/
|
||||||
Strategy<OpTy>::rewriteOp(b, xferOp, casted, iv);
|
[&](OpBuilder &b, Location /*loc*/) {
|
||||||
}, /*outOfBoundsCase=*/[&](OpBuilder &b, Location /*loc*/) {
|
// Create new transfer op.
|
||||||
Strategy<OpTy>::handleOutOfBoundsDim(b, xferOp, casted, iv);
|
OpTy newXfer =
|
||||||
});
|
Strategy<OpTy>::rewriteOp(b, xferOp, castedDataBuffer, iv);
|
||||||
|
|
||||||
|
// If old transfer op has a mask: Set mask on new transfer op.
|
||||||
|
if (xferOp.mask()) {
|
||||||
|
OpBuilder::InsertionGuard guard(b);
|
||||||
|
b.setInsertionPoint(newXfer); // Insert load before newXfer.
|
||||||
|
|
||||||
|
SmallVector<Value, 8> loadIndices;
|
||||||
|
Strategy<OpTy>::getBufferIndices(xferOp, loadIndices);
|
||||||
|
loadIndices.push_back(iv);
|
||||||
|
|
||||||
|
auto mask = memref_load(castedMaskBuffer, loadIndices);
|
||||||
|
rewriter.updateRootInPlace(
|
||||||
|
newXfer, [&]() { newXfer.maskMutable().assign(mask); });
|
||||||
|
}
|
||||||
|
},
|
||||||
|
/*outOfBoundsCase=*/
|
||||||
|
[&](OpBuilder &b, Location /*loc*/) {
|
||||||
|
Strategy<OpTy>::handleOutOfBoundsDim(b, xferOp, castedDataBuffer,
|
||||||
|
iv);
|
||||||
|
});
|
||||||
b.create<scf::YieldOp>(loc);
|
b.create<scf::YieldOp>(loc);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,3 @@
|
||||||
// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
|
|
||||||
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
|
|
||||||
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
|
|
||||||
// RUN: FileCheck %s
|
|
||||||
|
|
||||||
// RUN: mlir-opt %s -test-progressive-convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
|
// RUN: mlir-opt %s -test-progressive-convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
|
||||||
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
|
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
|
||||||
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
|
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
|
||||||
|
@ -17,6 +12,19 @@ func @transfer_read_2d(%A : memref<?x?xf32>, %base1: index, %base2: index) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func @transfer_read_2d_mask(%A : memref<?x?xf32>, %base1: index, %base2: index) {
|
||||||
|
%fm42 = constant -42.0: f32
|
||||||
|
%mask = constant dense<[[1, 0, 1, 0, 1, 1, 1, 0, 1],
|
||||||
|
[0, 0, 1, 1, 1, 1, 1, 0, 1],
|
||||||
|
[1, 1, 1, 1, 1, 1, 1, 0, 1],
|
||||||
|
[0, 0, 1, 0, 1, 1, 1, 0, 1]]> : vector<4x9xi1>
|
||||||
|
%f = vector.transfer_read %A[%base1, %base2], %fm42, %mask
|
||||||
|
{permutation_map = affine_map<(d0, d1) -> (d0, d1)>} :
|
||||||
|
memref<?x?xf32>, vector<4x9xf32>
|
||||||
|
vector.print %f: vector<4x9xf32>
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
func @transfer_read_2d_transposed(
|
func @transfer_read_2d_transposed(
|
||||||
%A : memref<?x?xf32>, %base1: index, %base2: index) {
|
%A : memref<?x?xf32>, %base1: index, %base2: index) {
|
||||||
%fm42 = constant -42.0: f32
|
%fm42 = constant -42.0: f32
|
||||||
|
@ -80,7 +88,10 @@ func @entry() {
|
||||||
call @transfer_write_2d(%A, %c3, %c1) : (memref<?x?xf32>, index, index) -> ()
|
call @transfer_write_2d(%A, %c3, %c1) : (memref<?x?xf32>, index, index) -> ()
|
||||||
// Read shifted by 0 and pad with -42:
|
// Read shifted by 0 and pad with -42:
|
||||||
call @transfer_read_2d(%A, %c0, %c0) : (memref<?x?xf32>, index, index) -> ()
|
call @transfer_read_2d(%A, %c0, %c0) : (memref<?x?xf32>, index, index) -> ()
|
||||||
// Same as above, but transposed
|
// Same as above, but apply a mask
|
||||||
|
call @transfer_read_2d_mask(%A, %c0, %c0)
|
||||||
|
: (memref<?x?xf32>, index, index) -> ()
|
||||||
|
// Same as above, but without mask and transposed
|
||||||
call @transfer_read_2d_transposed(%A, %c0, %c0)
|
call @transfer_read_2d_transposed(%A, %c0, %c0)
|
||||||
: (memref<?x?xf32>, index, index) -> ()
|
: (memref<?x?xf32>, index, index) -> ()
|
||||||
// Second vector dimension is a broadcast
|
// Second vector dimension is a broadcast
|
||||||
|
@ -92,5 +103,6 @@ func @entry() {
|
||||||
// CHECK: ( ( 12, 13, -42, -42, -42, -42, -42, -42, -42 ), ( 22, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
// CHECK: ( ( 12, 13, -42, -42, -42, -42, -42, -42, -42 ), ( 22, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
||||||
// CHECK: ( ( 12, 22, -42, -42, -42, -42, -42, -42, -42 ), ( 13, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
// CHECK: ( ( 12, 22, -42, -42, -42, -42, -42, -42, -42 ), ( 13, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
||||||
// CHECK: ( ( 0, 1, 2, 3, -42, -42, -42, -42, -42 ), ( 10, 11, 12, 13, -42, -42, -42, -42, -42 ), ( 20, 21, 22, 23, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
// CHECK: ( ( 0, 1, 2, 3, -42, -42, -42, -42, -42 ), ( 10, 11, 12, 13, -42, -42, -42, -42, -42 ), ( 20, 21, 22, 23, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
||||||
|
// CHECK: ( ( 0, -42, 2, -42, -42, -42, -42, -42, -42 ), ( -42, -42, 12, 13, -42, -42, -42, -42, -42 ), ( 20, 21, 22, 23, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
||||||
// CHECK: ( ( 0, 10, 20, -42, -42, -42, -42, -42, -42 ), ( 1, 11, 21, -42, -42, -42, -42, -42, -42 ), ( 2, 12, 22, -42, -42, -42, -42, -42, -42 ), ( 3, 13, 23, -42, -42, -42, -42, -42, -42 ) )
|
// CHECK: ( ( 0, 10, 20, -42, -42, -42, -42, -42, -42 ), ( 1, 11, 21, -42, -42, -42, -42, -42, -42 ), ( 2, 12, 22, -42, -42, -42, -42, -42, -42 ), ( 3, 13, 23, -42, -42, -42, -42, -42, -42 ) )
|
||||||
// CHECK: ( ( 12, 12, 12, 12, 12, 12, 12, 12, 12 ), ( 13, 13, 13, 13, 13, 13, 13, 13, 13 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
// CHECK: ( ( 12, 12, 12, 12, 12, 12, 12, 12, 12 ), ( 13, 13, 13, 13, 13, 13, 13, 13, 13 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
|
||||||
|
|
Loading…
Reference in New Issue