[mlir][MemRef] Compute unused dimensions of a rank-reducing subviews using strides as well.

For `memref.subview` operations, when there are more than one
unit-dimensions, the strides need to be used to figure out which of
the unit-dims are actually dropped.

Differential Revision: https://reviews.llvm.org/D109418
This commit is contained in:
MaheshRavishankar 2021-09-20 11:04:15 -07:00
parent 1e45cd75df
commit 4cf9bf6c9f
10 changed files with 304 additions and 77 deletions

View File

@ -1379,6 +1379,10 @@ def SubViewOp : BaseOpWithOffsetSizesAndStrides<
/// Return the number of leading operands before the `offsets`, `sizes` and
/// and `strides` operands.
static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
/// Return the dimensions of the source type that are dropped when
/// the result is rank-reduced.
llvm::SmallDenseSet<unsigned> getDroppedDims();
}];
let hasCanonicalizer = 1;

View File

@ -66,7 +66,7 @@ def OffsetSizeAndStrideOpInterface : OpInterface<"OffsetSizeAndStrideOpInterface
let cppNamespace = "::mlir";
let methods = [
InterfaceMethod<
StaticInterfaceMethod<
/*desc=*/[{
Return the number of leading operands before the `offsets`, `sizes` and
and `strides` operands.

View File

@ -1272,12 +1272,8 @@ struct SubViewOpLowering : public ConvertOpToLLVMPattern<memref::SubViewOp> {
extracted);
targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr);
auto shape = viewMemRefType.getShape();
auto inferredShape = inferredType.getShape();
size_t inferredShapeRank = inferredShape.size();
size_t resultShapeRank = shape.size();
llvm::SmallDenseSet<unsigned> unusedDims =
computeRankReductionMask(inferredShape, shape).getValue();
size_t inferredShapeRank = inferredType.getRank();
size_t resultShapeRank = viewMemRefType.getRank();
// Extract strides needed to compute offset.
SmallVector<Value, 4> strideValues;
@ -1315,6 +1311,7 @@ struct SubViewOpLowering : public ConvertOpToLLVMPattern<memref::SubViewOp> {
SmallVector<OpFoldResult> mixedStrides = subViewOp.getMixedStrides();
assert(mixedSizes.size() == mixedStrides.size() &&
"expected sizes and strides of equal length");
llvm::SmallDenseSet<unsigned> unusedDims = subViewOp.getDroppedDims();
for (int i = inferredShapeRank - 1, j = resultShapeRank - 1;
i >= 0 && j >= 0; --i) {
if (unusedDims.contains(i))

View File

@ -690,6 +690,92 @@ static LogicalResult verify(DimOp op) {
return success();
}
/// Return a map with key being elements in `vals` and data being number of
/// occurences of it. Use std::map, since the `vals` here are strides and the
/// dynamic stride value is the same as the tombstone value for
/// `DenseMap<int64_t>`.
static std::map<int64_t, unsigned> getNumOccurences(ArrayRef<int64_t> vals) {
std::map<int64_t, unsigned> numOccurences;
for (auto val : vals)
numOccurences[val]++;
return numOccurences;
}
/// Given the type of the un-rank reduced subview result type and the
/// rank-reduced result type, computes the dropped dimensions. This accounts for
/// cases where there are multiple unit-dims, but only a subset of those are
/// dropped. For MemRefTypes these can be disambiguated using the strides. If a
/// dimension is dropped the stride must be dropped too.
static llvm::Optional<llvm::SmallDenseSet<unsigned>>
computeMemRefRankReductionMask(MemRefType originalType, MemRefType reducedType,
ArrayAttr staticSizes) {
llvm::SmallDenseSet<unsigned> unusedDims;
if (originalType.getRank() == reducedType.getRank())
return unusedDims;
for (auto dim : llvm::enumerate(staticSizes))
if (dim.value().cast<IntegerAttr>().getInt() == 1)
unusedDims.insert(dim.index());
SmallVector<int64_t> originalStrides, candidateStrides;
int64_t originalOffset, candidateOffset;
if (failed(
getStridesAndOffset(originalType, originalStrides, originalOffset)) ||
failed(
getStridesAndOffset(reducedType, candidateStrides, candidateOffset)))
return llvm::None;
// For memrefs, a dimension is truly dropped if its corresponding stride is
// also dropped. This is particularly important when more than one of the dims
// is 1. Track the number of occurences of the strides in the original type
// and the candidate type. For each unused dim that stride should not be
// present in the candidate type. Note that there could be multiple dimensions
// that have the same size. We dont need to exactly figure out which dim
// corresponds to which stride, we just need to verify that the number of
// reptitions of a stride in the original + number of unused dims with that
// stride == number of repititions of a stride in the candidate.
std::map<int64_t, unsigned> currUnaccountedStrides =
getNumOccurences(originalStrides);
std::map<int64_t, unsigned> candidateStridesNumOccurences =
getNumOccurences(candidateStrides);
llvm::SmallDenseSet<unsigned> prunedUnusedDims;
for (unsigned dim : unusedDims) {
int64_t originalStride = originalStrides[dim];
if (currUnaccountedStrides[originalStride] >
candidateStridesNumOccurences[originalStride]) {
// This dim can be treated as dropped.
currUnaccountedStrides[originalStride]--;
continue;
}
if (currUnaccountedStrides[originalStride] ==
candidateStridesNumOccurences[originalStride]) {
// The stride for this is not dropped. Keep as is.
prunedUnusedDims.insert(dim);
continue;
}
if (currUnaccountedStrides[originalStride] <
candidateStridesNumOccurences[originalStride]) {
// This should never happen. Cant have a stride in the reduced rank type
// that wasnt in the original one.
return llvm::None;
}
}
for (auto prunedDim : prunedUnusedDims)
unusedDims.erase(prunedDim);
if (unusedDims.size() + reducedType.getRank() != originalType.getRank())
return llvm::None;
return unusedDims;
}
llvm::SmallDenseSet<unsigned> SubViewOp::getDroppedDims() {
MemRefType sourceType = getSourceType();
MemRefType resultType = getType();
llvm::Optional<llvm::SmallDenseSet<unsigned>> unusedDims =
computeMemRefRankReductionMask(sourceType, resultType, static_sizes());
assert(unusedDims && "unable to find unused dims of subview");
return *unusedDims;
}
OpFoldResult DimOp::fold(ArrayRef<Attribute> operands) {
// All forms of folding require a known index.
auto index = operands[1].dyn_cast_or_null<IntegerAttr>();
@ -725,6 +811,25 @@ OpFoldResult DimOp::fold(ArrayRef<Attribute> operands) {
return *(view.getDynamicSizes().begin() +
memrefType.getDynamicDimIndex(unsignedIndex));
if (auto subview = dyn_cast_or_null<SubViewOp>(definingOp)) {
llvm::SmallDenseSet<unsigned> unusedDims = subview.getDroppedDims();
unsigned resultIndex = 0;
unsigned sourceRank = subview.getSourceType().getRank();
unsigned sourceIndex = 0;
for (auto i : llvm::seq<unsigned>(0, sourceRank)) {
if (unusedDims.count(i))
continue;
if (resultIndex == unsignedIndex) {
sourceIndex = i;
break;
}
resultIndex++;
}
assert(subview.isDynamicSize(sourceIndex) &&
"expected dynamic subview size");
return subview.getDynamicSize(sourceIndex);
}
if (auto sizeInterface =
dyn_cast_or_null<OffsetSizeAndStrideOpInterface>(definingOp)) {
assert(sizeInterface.isDynamicSize(unsignedIndex) &&
@ -1887,7 +1992,7 @@ enum SubViewVerificationResult {
/// not matching dimension must be 1.
static SubViewVerificationResult
isRankReducedType(Type originalType, Type candidateReducedType,
std::string *errMsg = nullptr) {
ArrayAttr staticSizes, std::string *errMsg = nullptr) {
if (originalType == candidateReducedType)
return SubViewVerificationResult::Success;
if (!originalType.isa<MemRefType>())
@ -1908,8 +2013,11 @@ isRankReducedType(Type originalType, Type candidateReducedType,
if (candidateReducedRank > originalRank)
return SubViewVerificationResult::RankTooLarge;
MemRefType original = originalType.cast<MemRefType>();
MemRefType candidateReduced = candidateReducedType.cast<MemRefType>();
auto optionalUnusedDimsMask =
computeRankReductionMask(originalShape, candidateReducedShape);
computeMemRefRankReductionMask(original, candidateReduced, staticSizes);
// Sizes cannot be matched in case empty vector is returned.
if (!optionalUnusedDimsMask.hasValue())
@ -1920,42 +2028,8 @@ isRankReducedType(Type originalType, Type candidateReducedType,
return SubViewVerificationResult::ElemTypeMismatch;
// Strided layout logic is relevant for MemRefType only.
MemRefType original = originalType.cast<MemRefType>();
MemRefType candidateReduced = candidateReducedType.cast<MemRefType>();
if (original.getMemorySpace() != candidateReduced.getMemorySpace())
return SubViewVerificationResult::MemSpaceMismatch;
llvm::SmallDenseSet<unsigned> unusedDims = optionalUnusedDimsMask.getValue();
auto inferredType =
getProjectedMap(getStridedLinearLayoutMap(original), unusedDims);
AffineMap candidateLayout;
if (candidateReduced.getAffineMaps().empty())
candidateLayout = getStridedLinearLayoutMap(candidateReduced);
else
candidateLayout = candidateReduced.getAffineMaps().front();
assert(inferredType.getNumResults() == 1 &&
candidateLayout.getNumResults() == 1);
if (inferredType.getNumSymbols() != candidateLayout.getNumSymbols() ||
inferredType.getNumDims() != candidateLayout.getNumDims()) {
if (errMsg) {
llvm::raw_string_ostream os(*errMsg);
os << "inferred type: " << inferredType;
}
return SubViewVerificationResult::AffineMapMismatch;
}
// Check that the difference of the affine maps simplifies to 0.
AffineExpr diffExpr =
inferredType.getResult(0) - candidateLayout.getResult(0);
diffExpr = simplifyAffineExpr(diffExpr, inferredType.getNumDims(),
inferredType.getNumSymbols());
auto cst = diffExpr.dyn_cast<AffineConstantExpr>();
if (!(cst && cst.getValue() == 0)) {
if (errMsg) {
llvm::raw_string_ostream os(*errMsg);
os << "inferred type: " << inferredType;
}
return SubViewVerificationResult::AffineMapMismatch;
}
return SubViewVerificationResult::Success;
}
@ -2012,7 +2086,8 @@ static LogicalResult verify(SubViewOp op) {
extractFromI64ArrayAttr(op.static_strides()));
std::string errMsg;
auto result = isRankReducedType(expectedType, subViewType, &errMsg);
auto result =
isRankReducedType(expectedType, subViewType, op.static_sizes(), &errMsg);
return produceSubViewErrorMsg(result, op, expectedType, errMsg);
}

View File

@ -49,18 +49,13 @@ resolveSourceIndices(Location loc, PatternRewriter &rewriter,
SmallVector<Value> useIndices;
// Check if this is rank-reducing case. Then for every unit-dim size add a
// zero to the indices.
ArrayRef<int64_t> resultShape = subViewOp.getType().getShape();
unsigned resultDim = 0;
for (auto size : llvm::enumerate(mixedSizes)) {
auto attr = size.value().dyn_cast<Attribute>();
// Check if this dimension has been dropped, i.e. the size is 1, but the
// associated dimension is not 1.
if (attr && attr.cast<IntegerAttr>().getInt() == 1 &&
(resultDim >= resultShape.size() || resultShape[resultDim] != 1))
llvm::SmallDenseSet<unsigned> unusedDims = subViewOp.getDroppedDims();
for (auto dim : llvm::seq<unsigned>(0, subViewOp.getSourceType().getRank())) {
if (unusedDims.count(dim))
useIndices.push_back(rewriter.create<ConstantIndexOp>(loc, 0));
else if (resultDim < resultShape.size()) {
else
useIndices.push_back(indices[resultDim++]);
}
}
if (useIndices.size() != mixedOffsets.size())
return failure();
@ -104,6 +99,25 @@ static Value getMemRefOperand(vector::TransferWriteOp op) {
return op.source();
}
/// Given the permutation map of the original
/// `vector.transfer_read`/`vector.transfer_write` operations compute the
/// permutation map to use after the subview is folded with it.
static AffineMap getPermutationMap(MLIRContext *context,
memref::SubViewOp subViewOp,
AffineMap currPermutationMap) {
llvm::SmallDenseSet<unsigned> unusedDims = subViewOp.getDroppedDims();
SmallVector<AffineExpr> exprs;
unsigned resultIdx = 0;
int64_t sourceRank = subViewOp.getSourceType().getRank();
for (auto dim : llvm::seq<int64_t>(0, sourceRank)) {
if (unusedDims.count(dim))
continue;
exprs.push_back(getAffineDimExpr(resultIdx++, context));
}
auto resultDimToSourceDimMap = AffineMap::get(sourceRank, 0, exprs, context);
return currPermutationMap.compose(resultDimToSourceDimMap);
}
//===----------------------------------------------------------------------===//
// Patterns
//===----------------------------------------------------------------------===//
@ -153,7 +167,9 @@ void LoadOpOfSubViewFolder<vector::TransferReadOp>::replaceOp(
ArrayRef<Value> sourceIndices, PatternRewriter &rewriter) const {
rewriter.replaceOpWithNewOp<vector::TransferReadOp>(
loadOp, loadOp.getVectorType(), subViewOp.source(), sourceIndices,
loadOp.permutation_map(), loadOp.padding(), loadOp.in_boundsAttr());
getPermutationMap(rewriter.getContext(), subViewOp,
loadOp.permutation_map()),
loadOp.padding(), loadOp.in_boundsAttr());
}
template <>
@ -170,7 +186,9 @@ void StoreOpOfSubViewFolder<vector::TransferWriteOp>::replaceOp(
ArrayRef<Value> sourceIndices, PatternRewriter &rewriter) const {
rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
transferWriteOp, transferWriteOp.vector(), subViewOp.source(),
sourceIndices, transferWriteOp.permutation_map(),
sourceIndices,
getPermutationMap(rewriter.getContext(), subViewOp,
transferWriteOp.permutation_map()),
transferWriteOp.in_boundsAttr());
}
} // namespace

View File

@ -1418,3 +1418,28 @@ func @conv3d_no_symbols(%in : memref<?x?x?xf32>, %filter : memref<?x?x?xf32>, %o
// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
// -----
func @lower_to_loops_with_rank_reducing_subviews(
%arg0 : memref<?xi32>, %arg1 : memref<?x?xi32>, %arg2 : index,
%arg3 : index, %arg4 : index) {
%0 = memref.subview %arg0[%arg2] [%arg3] [1]
: memref<?xi32> to memref<?xi32, offset: ?, strides: [1]>
%1 = memref.subview %arg1[0, %arg4] [1, %arg3] [1, 1]
: memref<?x?xi32> to memref<?xi32, offset: ?, strides : [1]>
linalg.copy(%0, %1)
: memref<?xi32, offset: ?, strides: [1]>, memref<?xi32, offset: ?, strides: [1]>
return
}
// CHECK-LABEL: func @lower_to_loops_with_rank_reducing_subviews
// CHECK: scf.for %[[IV:.+]] = %{{.+}} to %{{.+}} step %{{.+}} {
// CHECK: %[[VAL:.+]] = memref.load %{{.+}}[%[[IV]]]
// CHECK: memref.store %[[VAL]], %{{.+}}[%[[IV]]]
// CHECK: }
// CHECKPARALLEL-LABEL: func @lower_to_loops_with_rank_reducing_subviews
// CHECKPARALLEL: scf.parallel (%[[IV:.+]]) = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
// CHECKPARALLEL: %[[VAL:.+]] = memref.load %{{.+}}[%[[IV]]]
// CHECKPARALLEL: memref.store %[[VAL]], %{{.+}}[%[[IV]]]
// CHECKPARALLEL: }

View File

@ -159,6 +159,63 @@ func @rank_reducing_subview_canonicalize(%arg0 : memref<?x?x?xf32>, %arg1 : inde
// CHECK: %[[RESULT:.+]] = memref.cast %[[SUBVIEW]]
// CHECK: return %[[RESULT]]
// -----
func @multiple_reducing_dims(%arg0 : memref<1x384x384xf32>,
%arg1 : index, %arg2 : index, %arg3 : index) -> memref<?xf32, offset: ?, strides: [1]>
{
%c1 = constant 1 : index
%0 = memref.subview %arg0[0, %arg1, %arg2] [1, %c1, %arg3] [1, 1, 1] : memref<1x384x384xf32> to memref<?x?xf32, offset: ?, strides: [384, 1]>
%1 = memref.subview %0[0, 0] [1, %arg3] [1, 1] : memref<?x?xf32, offset: ?, strides: [384, 1]> to memref<?xf32, offset: ?, strides: [1]>
return %1 : memref<?xf32, offset: ?, strides: [1]>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1)[s0] -> (d0 * 384 + s0 + d1)>
// CHECK: func @multiple_reducing_dims
// CHECK: %[[REDUCED1:.+]] = memref.subview %{{.+}}[0, %{{.+}}, %{{.+}}] [1, 1, %{{.+}}] [1, 1, 1]
// CHECK-SAME: : memref<1x384x384xf32> to memref<1x?xf32, #[[MAP1]]>
// CHECK: %[[REDUCED2:.+]] = memref.subview %[[REDUCED1]][0, 0] [1, %{{.+}}] [1, 1]
// CHECK-SAME: : memref<1x?xf32, #[[MAP1]]> to memref<?xf32, #[[MAP0]]>
// -----
func @multiple_reducing_dims_dynamic(%arg0 : memref<?x?x?xf32>,
%arg1 : index, %arg2 : index, %arg3 : index) -> memref<?xf32, offset: ?, strides: [1]>
{
%c1 = constant 1 : index
%0 = memref.subview %arg0[0, %arg1, %arg2] [1, %c1, %arg3] [1, 1, 1] : memref<?x?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
%1 = memref.subview %0[0, 0] [1, %arg3] [1, 1] : memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?xf32, offset: ?, strides: [1]>
return %1 : memref<?xf32, offset: ?, strides: [1]>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
// CHECK: func @multiple_reducing_dims_dynamic
// CHECK: %[[REDUCED1:.+]] = memref.subview %{{.+}}[0, %{{.+}}, %{{.+}}] [1, 1, %{{.+}}] [1, 1, 1]
// CHECK-SAME: : memref<?x?x?xf32> to memref<1x?xf32, #[[MAP1]]>
// CHECK: %[[REDUCED2:.+]] = memref.subview %[[REDUCED1]][0, 0] [1, %{{.+}}] [1, 1]
// CHECK-SAME: : memref<1x?xf32, #[[MAP1]]> to memref<?xf32, #[[MAP0]]>
// -----
func @multiple_reducing_dims_all_dynamic(%arg0 : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>,
%arg1 : index, %arg2 : index, %arg3 : index) -> memref<?xf32, offset: ?, strides: [?]>
{
%c1 = constant 1 : index
%0 = memref.subview %arg0[0, %arg1, %arg2] [1, %c1, %arg3] [1, 1, 1]
: memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
%1 = memref.subview %0[0, 0] [1, %arg3] [1, 1] : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<?xf32, offset: ?, strides: [?]>
return %1 : memref<?xf32, offset: ?, strides: [?]>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>
// CHECK: func @multiple_reducing_dims_all_dynamic
// CHECK: %[[REDUCED1:.+]] = memref.subview %{{.+}}[0, %{{.+}}, %{{.+}}] [1, 1, %{{.+}}] [1, 1, 1]
// CHECK-SAME: : memref<?x?x?xf32, #[[MAP2]]> to memref<1x?xf32, #[[MAP1]]>
// CHECK: %[[REDUCED2:.+]] = memref.subview %[[REDUCED1]][0, 0] [1, %{{.+}}] [1, 1]
// CHECK-SAME: : memref<1x?xf32, #[[MAP1]]> to memref<?xf32, #[[MAP0]]>
// -----
// CHECK-LABEL: @clone_before_dealloc
@ -567,4 +624,3 @@ func @collapse_after_memref_cast(%arg0 : memref<?x512x1x?xf32>) -> memref<?x?xf3
%collapsed = memref.collapse_shape %dynamic [[0], [1, 2, 3]] : memref<?x?x?x?xf32> into memref<?x?xf32>
return %collapsed : memref<?x?xf32>
}

View File

@ -160,3 +160,66 @@ func @fold_rank_reducing_subview_with_load
// CHECK-DAG: %[[I5:.+]] = affine.apply #[[MAP]](%[[ARG16]])[%[[ARG11]], %[[ARG5]]]
// CHECK-DAG: %[[I6:.+]] = affine.apply #[[MAP]](%[[C0]])[%[[ARG12]], %[[ARG6]]]
// CHECK: memref.load %[[ARG0]][%[[I1]], %[[I2]], %[[I3]], %[[I4]], %[[I5]], %[[I6]]]
// -----
func @fold_vector_transfer_read_with_rank_reduced_subview(
%arg0 : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>,
%arg1: index, %arg2 : index, %arg3 : index, %arg4: index, %arg5 : index,
%arg6 : index) -> vector<4xf32> {
%cst = constant 0.0 : f32
%0 = memref.subview %arg0[0, %arg1, %arg2] [1, %arg3, %arg4] [1, 1, 1]
: memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]> to
memref<?x?xf32, offset: ?, strides: [?, ?]>
%1 = vector.transfer_read %0[%arg5, %arg6], %cst {in_bounds = [true]}
: memref<?x?xf32, offset: ?, strides: [?, ?]>, vector<4xf32>
return %1 : vector<4xf32>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)>
// CHECK: func @fold_vector_transfer_read_with_rank_reduced_subview
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?x?xf32, #[[MAP0]]>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index
// CHECK-DAG: %[[C0:.+]] = constant 0 : index
// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG5]])[%[[ARG1]]]
// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]]
// CHECK: vector.transfer_read %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]]
// CHECK-SAME: permutation_map = #[[MAP2]]
// -----
func @fold_vector_transfer_write_with_rank_reduced_subview(
%arg0 : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>,
%arg1 : vector<4xf32>, %arg2: index, %arg3 : index, %arg4 : index,
%arg5: index, %arg6 : index, %arg7 : index) {
%cst = constant 0.0 : f32
%0 = memref.subview %arg0[0, %arg2, %arg3] [1, %arg4, %arg5] [1, 1, 1]
: memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]> to
memref<?x?xf32, offset: ?, strides: [?, ?]>
vector.transfer_write %arg1, %0[%arg6, %arg7] {in_bounds = [true]}
: vector<4xf32>, memref<?x?xf32, offset: ?, strides: [?, ?]>
return
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)>
// CHECK: func @fold_vector_transfer_write_with_rank_reduced_subview
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?x?xf32, #[[MAP0]]>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index
// CHECK-DAG: %[[C0:.+]] = constant 0 : index
// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]]
// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG7]])[%[[ARG3]]]
// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]]
// CHECK-SAME: permutation_map = #[[MAP2]]

View File

@ -353,3 +353,12 @@ func @collapse_shape_illegal_mixed_memref_2(%arg0 : memref<?x4x5xf32>)
: memref<?x4x5xf32> into memref<?x?xf32>
return %0 : memref<?x?xf32>
}
// -----
func @static_stride_to_dynamic_stride(%arg0 : memref<?x?x?xf32>, %arg1 : index,
%arg2 : index) -> memref<?x?xf32, offset:?, strides: [?, ?]> {
// expected-error @+1 {{expected result type to be 'memref<1x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)>>' or a rank-reduced version. (mismatch of result sizes)}}
%0 = memref.subview %arg0[0, 0, 0] [1, %arg1, %arg2] [1, 1, 1] : memref<?x?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
return %0 : memref<?x?xf32, offset: ?, strides: [?, ?]>
}

View File

@ -960,17 +960,6 @@ func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
// -----
func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
%0 = memref.alloc() : memref<8x16x4xf32>
// expected-error@+1 {{expected result type to be 'memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>' or a rank-reduced version. (mismatch of result affine map)}}
%1 = memref.subview %0[%arg0, %arg1, %arg2][%arg0, %arg1, %arg2][%arg0, %arg1, %arg2]
: memref<8x16x4xf32> to
memref<?x?x?xf32, offset: ?, strides: [64, 4, 1]>
return
}
// -----
func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
%0 = memref.alloc() : memref<8x16x4xf32>
// expected-error@+1 {{expected result element type to be 'f32'}}
@ -1014,22 +1003,13 @@ func @invalid_rank_reducing_subview(%arg0 : index, %arg1 : index, %arg2 : index)
// -----
func @invalid_rank_reducing_subview(%arg0 : memref<?x?xf32>, %arg1 : index, %arg2 : index) {
// expected-error@+1 {{expected result type to be 'memref<?x1xf32, affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>>' or a rank-reduced version. (mismatch of result affine map)}}
// expected-error@+1 {{expected result type to be 'memref<?x1xf32, affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>>' or a rank-reduced version. (mismatch of result sizes)}}
%0 = memref.subview %arg0[0, %arg1][%arg2, 1][1, 1] : memref<?x?xf32> to memref<?xf32>
return
}
// -----
// The affine map affine_map<(d0)[s0, s1, s2] -> (d0 * s1 + s0)> has an extra unused symbol.
func @invalid_rank_reducing_subview(%arg0 : memref<?x?xf32>, %arg1 : index, %arg2 : index) {
// expected-error@+1 {{expected result type to be 'memref<?x1xf32, affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>>' or a rank-reduced version. (mismatch of result affine map) inferred type: (d0)[s0, s1] -> (d0 * s1 + s0)}}
%0 = memref.subview %arg0[0, %arg1][%arg2, 1][1, 1] : memref<?x?xf32> to memref<?xf32, affine_map<(d0)[s0, s1, s2] -> (d0 * s1 + s0)>>
return
}
// -----
func @invalid_memref_cast(%arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]>) {
// expected-error@+1{{operand type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 16 + d2)>>' and result type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 128 + d1 * 32 + d2 * 2)>>' are cast incompatible}}
%0 = memref.cast %arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]> to memref<12x4x16xf32, offset:0, strides:[128, 32, 2]>