forked from OSchip/llvm-project
[mlir][VectorOps] Redo the scalar loop emission in VectoToSCF to pad instead of clipping
This replaces the select chain for edge-padding with an scf.if that performs the memory operation when the index is in bounds and uses the pad value when it's not. For transfer_write the same mechanism is used, skipping the store when the index is out of bounds. The integration test has a bunch of cases of how I believe this should work. Differential Revision: https://reviews.llvm.org/D87241
This commit is contained in:
parent
67b37f571c
commit
239eff502b
|
@ -4,6 +4,7 @@
|
|||
// RUN: FileCheck %s
|
||||
|
||||
#map0 = affine_map<(d0, d1) -> (d1, d0)>
|
||||
#map1 = affine_map<(d0, d1) -> (d1)>
|
||||
|
||||
func @print_memref_f32(memref<*xf32>)
|
||||
|
||||
|
@ -29,6 +30,7 @@ func @main() {
|
|||
%c0 = constant 0 : index
|
||||
%c1 = constant 1 : index
|
||||
%c2 = constant 2 : index
|
||||
%c3 = constant 3 : index
|
||||
%c6 = constant 6 : index
|
||||
%cst = constant -4.2e+01 : f32
|
||||
%0 = call @alloc_2d_filled_f32(%c6, %c6) : (index, index) -> memref<?x?xf32>
|
||||
|
@ -76,6 +78,28 @@ func @main() {
|
|||
// CHECK-SAME: ( 205, 305, 405, 505, 504 ),
|
||||
// CHECK-SAME: ( 105, 205, 305, 405, 505 ) )
|
||||
|
||||
%3 = vector.transfer_read %0[%c2, %c3], %cst : memref<?x?xf32>, vector<5x5xf32>
|
||||
vector.print %3 : vector<5x5xf32>
|
||||
// New 5x5 block rooted @{2, 3} in memory.
|
||||
// CHECK-NEXT: ( ( 403, 503, 502, -42, -42 ),
|
||||
// CHECK-SAME: ( 404, 504, 503, -42, -42 ),
|
||||
// CHECK-SAME: ( 405, 505, 504, -42, -42 ),
|
||||
// CHECK-SAME: ( 305, 405, 505, -42, -42 ),
|
||||
// CHECK-SAME: ( -42, -42, -42, -42, -42 ) )
|
||||
|
||||
%4 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map0} : memref<?x?xf32>, vector<5x5xf32>
|
||||
vector.print %4 : vector<5x5xf32>
|
||||
// Transposed 5x5 block rooted @{2, 3} in memory.
|
||||
// CHECK-NEXT: ( ( 403, 404, 405, 305, -42 ),
|
||||
// CHECK-SAME: ( 503, 504, 505, 405, -42 ),
|
||||
// CHECK-SAME: ( 502, 503, 504, 505, -42 ),
|
||||
// CHECK-SAME: ( -42, -42, -42, -42, -42 ),
|
||||
// CHECK-SAME: ( -42, -42, -42, -42, -42 ) )
|
||||
|
||||
%5 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map1} : memref<?x?xf32>, vector<5xf32>
|
||||
vector.print %5 : vector<5xf32>
|
||||
// CHECK-NEXT: ( 403, 503, 502, -42, -42 )
|
||||
|
||||
dealloc %0 : memref<?x?xf32>
|
||||
return
|
||||
}
|
||||
|
|
|
@ -1096,7 +1096,7 @@ static bool isContiguous(MemRefType memRefType,
|
|||
SmallVectorImpl<int64_t> &strides) {
|
||||
int64_t offset;
|
||||
auto successStrides = getStridesAndOffset(memRefType, strides, offset);
|
||||
bool isContiguous = (strides.back() == 1);
|
||||
bool isContiguous = strides.empty() || strides.back() == 1;
|
||||
if (isContiguous) {
|
||||
auto sizes = memRefType.getShape();
|
||||
for (int index = 0, e = strides.size() - 2; index < e; ++index) {
|
||||
|
|
|
@ -111,15 +111,6 @@ private:
|
|||
template <typename Lambda>
|
||||
void emitLoops(Lambda loopBodyBuilder);
|
||||
|
||||
/// Operate within the body of `emitLoops` to:
|
||||
/// 1. Compute the indexings `majorIvs + majorOffsets` and save them in
|
||||
/// `majorIvsPlusOffsets`.
|
||||
/// 2. Return a boolean that determines whether the first `majorIvs.rank()`
|
||||
/// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`.
|
||||
Value emitInBoundsCondition(ValueRange majorIvs, ValueRange majorOffsets,
|
||||
MemRefBoundsCapture &memrefBounds,
|
||||
SmallVectorImpl<Value> &majorIvsPlusOffsets);
|
||||
|
||||
/// Common state to lower vector transfer ops.
|
||||
PatternRewriter &rewriter;
|
||||
const VectorTransferToSCFOptions &options;
|
||||
|
@ -196,11 +187,16 @@ static Value onTheFlyFoldSLT(Value v, Value ub) {
|
|||
return slt(v, ub);
|
||||
}
|
||||
|
||||
template <typename ConcreteOp>
|
||||
Value NDTransferOpHelper<ConcreteOp>::emitInBoundsCondition(
|
||||
ValueRange majorIvs, ValueRange majorOffsets,
|
||||
MemRefBoundsCapture &memrefBounds,
|
||||
SmallVectorImpl<Value> &majorIvsPlusOffsets) {
|
||||
/// 1. Compute the indexings `majorIvs + majorOffsets` and save them in
|
||||
/// `majorIvsPlusOffsets`.
|
||||
/// 2. Return a value of i1 that determines whether the first `majorIvs.rank()`
|
||||
/// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`.
|
||||
static Value
|
||||
emitInBoundsCondition(PatternRewriter &rewriter,
|
||||
VectorTransferOpInterface xferOp, unsigned leadingRank,
|
||||
ValueRange majorIvs, ValueRange majorOffsets,
|
||||
MemRefBoundsCapture &memrefBounds,
|
||||
SmallVectorImpl<Value> &majorIvsPlusOffsets) {
|
||||
Value inBoundsCondition;
|
||||
majorIvsPlusOffsets.reserve(majorIvs.size());
|
||||
unsigned idx = 0;
|
||||
|
@ -271,7 +267,8 @@ LogicalResult NDTransferOpHelper<TransferReadOp>::doReplace() {
|
|||
// context.
|
||||
SmallVector<Value, 4> majorIvsPlusOffsets;
|
||||
Value inBoundsCondition = emitInBoundsCondition(
|
||||
majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets);
|
||||
rewriter, cast<VectorTransferOpInterface>(xferOp.getOperation()),
|
||||
leadingRank, majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets);
|
||||
|
||||
if (inBoundsCondition) {
|
||||
// 2. If the condition is not null, we need an IfOp, which may yield
|
||||
|
@ -374,7 +371,8 @@ LogicalResult NDTransferOpHelper<TransferWriteOp>::doReplace() {
|
|||
// context.
|
||||
SmallVector<Value, 4> majorIvsPlusOffsets;
|
||||
Value inBoundsCondition = emitInBoundsCondition(
|
||||
majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets);
|
||||
rewriter, cast<VectorTransferOpInterface>(xferOp.getOperation()),
|
||||
leadingRank, majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets);
|
||||
|
||||
if (inBoundsCondition) {
|
||||
// 2.a. If the condition is not null, we need an IfOp, to write
|
||||
|
@ -424,60 +422,6 @@ static int computeCoalescedIndex(TransferOpTy transfer) {
|
|||
return coalescedIdx;
|
||||
}
|
||||
|
||||
/// Emits remote memory accesses that are clipped to the boundaries of the
|
||||
/// MemRef.
|
||||
template <typename TransferOpTy>
|
||||
static SmallVector<Value, 8>
|
||||
clip(TransferOpTy transfer, MemRefBoundsCapture &bounds, ArrayRef<Value> ivs) {
|
||||
using namespace mlir::edsc;
|
||||
|
||||
Value zero(std_constant_index(0)), one(std_constant_index(1));
|
||||
SmallVector<Value, 8> memRefAccess(transfer.indices());
|
||||
SmallVector<Value, 8> clippedScalarAccessExprs(memRefAccess.size());
|
||||
// Indices accessing to remote memory are clipped and their expressions are
|
||||
// returned in clippedScalarAccessExprs.
|
||||
for (unsigned memRefDim = 0; memRefDim < clippedScalarAccessExprs.size();
|
||||
++memRefDim) {
|
||||
// Linear search on a small number of entries.
|
||||
int loopIndex = -1;
|
||||
auto exprs = transfer.permutation_map().getResults();
|
||||
for (auto en : llvm::enumerate(exprs)) {
|
||||
auto expr = en.value();
|
||||
auto dim = expr.template dyn_cast<AffineDimExpr>();
|
||||
// Sanity check.
|
||||
assert(
|
||||
(dim || expr.template cast<AffineConstantExpr>().getValue() == 0) &&
|
||||
"Expected dim or 0 in permutationMap");
|
||||
if (dim && memRefDim == dim.getPosition()) {
|
||||
loopIndex = en.index();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// We cannot distinguish atm between unrolled dimensions that implement
|
||||
// the "always full" tile abstraction and need clipping from the other
|
||||
// ones. So we conservatively clip everything.
|
||||
using namespace edsc::op;
|
||||
auto N = bounds.ub(memRefDim);
|
||||
auto i = memRefAccess[memRefDim];
|
||||
if (loopIndex < 0) {
|
||||
auto N_minus_1 = N - one;
|
||||
auto select_1 = std_select(slt(i, N), i, N_minus_1);
|
||||
clippedScalarAccessExprs[memRefDim] =
|
||||
std_select(slt(i, zero), zero, select_1);
|
||||
} else {
|
||||
auto ii = ivs[loopIndex];
|
||||
auto i_plus_ii = i + ii;
|
||||
auto N_minus_1 = N - one;
|
||||
auto select_1 = std_select(slt(i_plus_ii, N), i_plus_ii, N_minus_1);
|
||||
clippedScalarAccessExprs[memRefDim] =
|
||||
std_select(slt(i_plus_ii, zero), zero, select_1);
|
||||
}
|
||||
}
|
||||
|
||||
return clippedScalarAccessExprs;
|
||||
}
|
||||
|
||||
namespace mlir {
|
||||
|
||||
template <typename TransferOpTy>
|
||||
|
@ -497,6 +441,60 @@ MemRefType VectorTransferRewriter<TransferOpTy>::tmpMemRefType(
|
|||
{}, 0);
|
||||
}
|
||||
|
||||
static void emitWithBoundsChecks(
|
||||
PatternRewriter &rewriter, VectorTransferOpInterface transfer,
|
||||
ValueRange ivs, MemRefBoundsCapture &memRefBoundsCapture,
|
||||
function_ref<void(ArrayRef<Value>)> inBoundsFun,
|
||||
function_ref<void(ArrayRef<Value>)> outOfBoundsFun = nullptr) {
|
||||
// Permute the incoming indices according to the permutation map.
|
||||
SmallVector<Value, 4> indices =
|
||||
linalg::applyMapToValues(rewriter, transfer.getLoc(),
|
||||
transfer.permutation_map(), transfer.indices());
|
||||
|
||||
// Generate a bounds check if necessary.
|
||||
SmallVector<Value, 4> majorIvsPlusOffsets;
|
||||
Value inBoundsCondition =
|
||||
emitInBoundsCondition(rewriter, transfer, 0, ivs, indices,
|
||||
memRefBoundsCapture, majorIvsPlusOffsets);
|
||||
|
||||
// Apply the permutation map to the ivs. The permutation map may not use all
|
||||
// the inputs.
|
||||
SmallVector<Value, 4> scalarAccessExprs(transfer.indices().size());
|
||||
for (unsigned memRefDim = 0; memRefDim < transfer.indices().size();
|
||||
++memRefDim) {
|
||||
// Linear search on a small number of entries.
|
||||
int loopIndex = -1;
|
||||
auto exprs = transfer.permutation_map().getResults();
|
||||
for (auto en : llvm::enumerate(exprs)) {
|
||||
auto expr = en.value();
|
||||
auto dim = expr.dyn_cast<AffineDimExpr>();
|
||||
// Sanity check.
|
||||
assert((dim || expr.cast<AffineConstantExpr>().getValue() == 0) &&
|
||||
"Expected dim or 0 in permutationMap");
|
||||
if (dim && memRefDim == dim.getPosition()) {
|
||||
loopIndex = en.index();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
using namespace edsc::op;
|
||||
auto i = transfer.indices()[memRefDim];
|
||||
scalarAccessExprs[memRefDim] = loopIndex < 0 ? i : i + ivs[loopIndex];
|
||||
}
|
||||
|
||||
if (inBoundsCondition)
|
||||
conditionBuilder(
|
||||
/* scf.if */ inBoundsCondition, // {
|
||||
[&] { inBoundsFun(scalarAccessExprs); },
|
||||
// } else {
|
||||
outOfBoundsFun ? [&] { outOfBoundsFun(scalarAccessExprs); }
|
||||
: function_ref<void()>()
|
||||
// }
|
||||
);
|
||||
else
|
||||
inBoundsFun(scalarAccessExprs);
|
||||
}
|
||||
|
||||
/// Lowers TransferReadOp into a combination of:
|
||||
/// 1. local memory allocation;
|
||||
/// 2. perfect loop nest over:
|
||||
|
@ -588,17 +586,25 @@ LogicalResult VectorTransferRewriter<TransferReadOp>::matchAndRewrite(
|
|||
Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer);
|
||||
StdIndexedValue local(tmp);
|
||||
loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
|
||||
auto ivs = llvm::to_vector<8>(loopIvs);
|
||||
auto ivsStorage = llvm::to_vector<8>(loopIvs);
|
||||
// Swap the ivs which will reorder memory accesses.
|
||||
if (coalescedIdx >= 0)
|
||||
std::swap(ivs.back(), ivs[coalescedIdx]);
|
||||
// Computes clippedScalarAccessExprs in the loop nest scope (ivs exist).
|
||||
SmallVector<Value, 8> indices = clip(transfer, memRefBoundsCapture, ivs);
|
||||
ArrayRef<Value> indicesRef(indices), ivsRef(ivs);
|
||||
Value pos = std_index_cast(IntegerType::get(32, ctx), ivsRef.back());
|
||||
Value scal = remote(indicesRef);
|
||||
Value vector = vector_insert_element(scal, local(ivsRef.drop_back()), pos);
|
||||
local(ivsRef.drop_back()) = vector;
|
||||
std::swap(ivsStorage.back(), ivsStorage[coalescedIdx]);
|
||||
|
||||
ArrayRef<Value> ivs(ivsStorage);
|
||||
Value pos = std_index_cast(IntegerType::get(32, ctx), ivs.back());
|
||||
Value inVector = local(ivs.drop_back());
|
||||
auto loadValue = [&](ArrayRef<Value> indices) {
|
||||
Value vector = vector_insert_element(remote(indices), inVector, pos);
|
||||
local(ivs.drop_back()) = vector;
|
||||
};
|
||||
auto loadPadding = [&](ArrayRef<Value>) {
|
||||
Value vector = vector_insert_element(transfer.padding(), inVector, pos);
|
||||
local(ivs.drop_back()) = vector;
|
||||
};
|
||||
emitWithBoundsChecks(
|
||||
rewriter, cast<VectorTransferOpInterface>(transfer.getOperation()), ivs,
|
||||
memRefBoundsCapture, loadValue, loadPadding);
|
||||
});
|
||||
Value vectorValue = std_load(vector_type_cast(tmp));
|
||||
|
||||
|
@ -674,17 +680,21 @@ LogicalResult VectorTransferRewriter<TransferWriteOp>::matchAndRewrite(
|
|||
Value vec = vector_type_cast(tmp);
|
||||
std_store(vectorValue, vec);
|
||||
loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
|
||||
auto ivs = llvm::to_vector<8>(loopIvs);
|
||||
// Swap the ivs which will reorder memory accesses.
|
||||
auto ivsStorage = llvm::to_vector<8>(loopIvs);
|
||||
// Swap the ivsStorage which will reorder memory accesses.
|
||||
if (coalescedIdx >= 0)
|
||||
std::swap(ivs.back(), ivs[coalescedIdx]);
|
||||
// Computes clippedScalarAccessExprs in the loop nest scope (ivs exist).
|
||||
SmallVector<Value, 8> indices = clip(transfer, memRefBoundsCapture, ivs);
|
||||
ArrayRef<Value> indicesRef(indices), ivsRef(ivs);
|
||||
std::swap(ivsStorage.back(), ivsStorage[coalescedIdx]);
|
||||
|
||||
ArrayRef<Value> ivs(ivsStorage);
|
||||
Value pos =
|
||||
std_index_cast(IntegerType::get(32, op->getContext()), ivsRef.back());
|
||||
Value scalar = vector_extract_element(local(ivsRef.drop_back()), pos);
|
||||
remote(indices) = scalar;
|
||||
std_index_cast(IntegerType::get(32, op->getContext()), ivs.back());
|
||||
auto storeValue = [&](ArrayRef<Value> indices) {
|
||||
Value scalar = vector_extract_element(local(ivs.drop_back()), pos);
|
||||
remote(indices) = scalar;
|
||||
};
|
||||
emitWithBoundsChecks(
|
||||
rewriter, cast<VectorTransferOpInterface>(transfer.getOperation()), ivs,
|
||||
memRefBoundsCapture, storeValue);
|
||||
});
|
||||
|
||||
// 3. Erase.
|
||||
|
|
|
@ -15,11 +15,13 @@ func @materialize_read_1d() {
|
|||
%ip3 = affine.apply affine_map<(d0) -> (d0 + 3)> (%i1)
|
||||
%f4 = vector.transfer_read %A[%i0, %ip3], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
|
||||
// Both accesses in the load must be clipped otherwise %i1 + 2 and %i1 + 3 will go out of bounds.
|
||||
// CHECK: {{.*}} = select
|
||||
// CHECK: %[[FILTERED1:.*]] = select
|
||||
// CHECK: {{.*}} = select
|
||||
// CHECK: %[[FILTERED2:.*]] = select
|
||||
// CHECK: %{{.*}} = load {{.*}}[%[[FILTERED1]], %[[FILTERED2]]] : memref<7x42xf32>
|
||||
// CHECK: scf.if
|
||||
// CHECK-NEXT: load
|
||||
// CHECK-NEXT: vector.insertelement
|
||||
// CHECK-NEXT: store
|
||||
// CHECK-NEXT: else
|
||||
// CHECK-NEXT: vector.insertelement
|
||||
// CHECK-NEXT: store
|
||||
}
|
||||
}
|
||||
return
|
||||
|
@ -53,7 +55,6 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d
|
|||
// -----
|
||||
|
||||
// CHECK: #[[$ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)>
|
||||
// CHECK: #[[$SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)>
|
||||
|
||||
// CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
|
||||
func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
|
||||
|
@ -72,37 +73,18 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
|
|||
// CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
|
||||
// CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
|
||||
// CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I0]], %[[I4]])
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index
|
||||
// CHECK-NEXT: {{.*}} = select
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
|
||||
// CHECK-NEXT: %[[L0:.*]] = select
|
||||
//
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index
|
||||
// CHECK-NEXT: {{.*}} = select
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
|
||||
// CHECK-NEXT: %[[L1:.*]] = select
|
||||
//
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index
|
||||
// CHECK-NEXT: {{.*}} = select
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
|
||||
// CHECK-NEXT: %[[L2:.*]] = select
|
||||
//
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I3]], %[[I6]])
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index
|
||||
// CHECK-NEXT: {{.*}} = select
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
|
||||
// CHECK-NEXT: %[[L3:.*]] = select
|
||||
// CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]]
|
||||
//
|
||||
// CHECK-NEXT: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
|
||||
// CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32>
|
||||
// CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
|
||||
// CHECK: %[[VIDX:.*]] = index_cast %[[I4]]
|
||||
// CHECK: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
|
||||
// CHECK: %[[L0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I4]])
|
||||
// CHECK: %[[L3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]])
|
||||
// CHECK-NEXT: scf.if
|
||||
// CHECK-NEXT: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[I1]], %[[I2]], %[[L3]]] : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32>
|
||||
// CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
|
||||
// CHECK-NEXT: } else {
|
||||
// CHECK-NEXT: %[[CVEC:.*]] = vector.insertelement
|
||||
// CHECK-NEXT: store %[[CVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
|
@ -132,7 +114,6 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
|
|||
// -----
|
||||
|
||||
// CHECK: #[[$ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)>
|
||||
// CHECK: #[[$SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)>
|
||||
|
||||
// CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
|
||||
func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
|
||||
|
@ -153,37 +134,15 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
|
|||
// CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
|
||||
// CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
|
||||
// CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I0]], %[[I4]])
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index
|
||||
// CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
|
||||
// CHECK-NEXT: %[[S0:.*]] = select {{.*}}, %[[C0]], {{.*}} : index
|
||||
//
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I1]], %[[I5]])
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index
|
||||
// CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
|
||||
// CHECK-NEXT: %[[S1:.*]] = select {{.*}}, %[[C0]], {{.*}} : index
|
||||
//
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", %[[I2]], %{{.*}} : index
|
||||
// CHECK-NEXT: {{.*}} = select {{.*}}, %[[I2]], {{.*}} : index
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", %[[I2]], %[[C0]] : index
|
||||
// CHECK-NEXT: %[[S2:.*]] = select {{.*}}, %[[C0]], {{.*}} : index
|
||||
//
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I3]], %[[I6]])
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}]
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index
|
||||
// CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
|
||||
// CHECK-NEXT: %[[S3:.*]] = select {{.*}}, %[[C0]], {{.*}} : index
|
||||
// CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]]
|
||||
//
|
||||
// CHECK-NEXT: %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
|
||||
// CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32>
|
||||
// CHECK-NEXT: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : memref<?x?x?x?xf32>
|
||||
// CHECK: %[[VIDX:.*]] = index_cast %[[I4]]
|
||||
// CHECK: %[[S0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I4]])
|
||||
// CHECK: %[[S1:.*]] = affine.apply #[[$ADD]](%[[I1]], %[[I5]])
|
||||
// CHECK: %[[S3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]])
|
||||
// CHECK-NEXT: scf.if
|
||||
// CHECK-NEXT: %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
|
||||
// CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32>
|
||||
// CHECK: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[I2]], %[[S3]]] : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
|
|
Loading…
Reference in New Issue