From 03391df90ed142aafc0669501071cd30218adc0e Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Mon, 13 Apr 2020 09:33:34 -0700 Subject: [PATCH] [mlir][Linalg] Add loop.parallel lowering for all Linalg Ops. The outer parallel loops of a linalg operation is lowered to loop.parallel, with the other loops lowered to loop.for. This gets the lowering to loop.parallel on par with the loop.for lowering. In future the reduction loop could also be lowered to loop.parallel. Also add a utility function that returns the loops that are created. Differential Revision: https://reviews.llvm.org/D77678 --- .../Linalg/Transforms/LinalgTransforms.h | 7 + .../Linalg/Transforms/LinalgToLoops.cpp | 228 +++-- mlir/test/Dialect/Linalg/loops.mlir | 890 ++++++++++++------ mlir/test/Dialect/Linalg/parallel_loops.mlir | 24 +- 4 files changed, 780 insertions(+), 369 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h index 4a87403938ed..3bff0f1bbf6e 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h @@ -70,6 +70,13 @@ LogicalResult tileAndFuseLinalgOpAndSetMarker( PatternRewriter &rewriter, Operation *op, ArrayRef sizes, ArrayRef operandIndicesToFuse, StringRef linalgMarker); +using LinalgLoops = SmallVector; + +/// Emits a loop nest of with the proper body for `op`. +template +Optional linalgLowerOpToLoops(PatternRewriter &rewriter, + Operation *op); + /// Emits a loop nest of `loop.for` with the proper body for `op`. template LogicalResult linalgOpToLoops(PatternRewriter &rewriter, Operation *op); diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp index 48df0ac3ea2a..9717bb874345 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp @@ -533,26 +533,111 @@ public: // consequence, (1) it is only allowed to emit new ops if the match is // guaranteed to be a success, (2) it is not allowed erase/replace, and (3) an // encompassing pattern must take care of the erasure logic. -template +template class LinalgOpToLoopsImpl { public: - static LogicalResult doit(Operation *op, PatternRewriter &rewriter); + static Optional doit(Operation *op, PatternRewriter &rewriter); }; -template -bool loweringIsAllowed(int numParallelLoops, int numLoops) { - return true; -} -template <> -bool loweringIsAllowed(int numParallelLoops, int numLoops) { - return numParallelLoops == numLoops; -} +namespace { +/// Helper struct to generate the loop nest for the op. This factored out here +/// to be able to partially specialize this for different LoopTy. +template +class GenerateLoopNest { +public: + using IndexedValueTy = + typename std::conditional::value, + AffineIndexedValue, StdIndexedValue>::type; + static void doit(ConcreteOpTy linalgOp, ArrayRef loopRanges, + MutableArrayRef allIvs) { + SmallVector allPIvs = + makeHandlePointers(MutableArrayRef(allIvs)); -template -LogicalResult LinalgOpToLoopsImpl::doit( - Operation *op, PatternRewriter &rewriter) { - OpBuilder b(op); - ScopedContext scope(b, op->getLoc()); + GenericLoopNestRangeBuilder(allPIvs, loopRanges)([&] { + SmallVector allIvValues(allIvs.begin(), allIvs.end()); + LinalgScopedEmitter::emitScalarImplementation(allIvValues, + linalgOp); + }); + } +}; + +/// Generates loops nest using loop.parallel. loop.parallel is only used for the +/// outer parallel loops. All other loops are generated using loop.for +/// operation. +template +class GenerateLoopNest { +public: + using IndexedValueTy = StdIndexedValue; + + static void doit(ConcreteOpTy linalgOp, ArrayRef loopRanges, + MutableArrayRef allIvs) { + // Only generate loop.parallel for outer consecutive "parallel" + // iterator_types. + // TODO(ravishankarm): Generate loop.parallel for all "parallel" iterator + // types, not just the outer most ones. Also handle "reduction" iterator + // types. + auto nPar = linalgOp.getNumParallelLoops(); + auto nRed = linalgOp.getNumReductionLoops(); + auto nWin = linalgOp.getNumWindowLoops(); + auto nLoops = nPar + nRed + nWin; + auto nOuterPar = linalgOp.iterator_types() + .getValue() + .take_while([](Attribute attr) { + return attr.cast().getValue() == + getParallelIteratorTypeName(); + }) + .size(); + // If there are no outer parallel loops, then number of loop ops is same as + // the number of loops, and they are all loop.for ops. + auto nLoopOps = (nOuterPar ? nLoops - nOuterPar + 1 : nLoops); + SmallVector allPIvs = + makeHandlePointers(MutableArrayRef(allIvs)); + + SmallVector allLoops(nLoopOps, OperationHandle()); + SmallVector allPLoops; + allPLoops.reserve(allLoops.size()); + for (OperationHandle &loop : allLoops) + allPLoops.push_back(&loop); + + ArrayRef allPIvsRef(allPIvs); + ArrayRef allPLoopsRef(allPLoops); + + if (nOuterPar) { + GenericLoopNestRangeBuilder( + allPIvsRef.take_front(nOuterPar), + loopRanges.take_front(nOuterPar))([&] { + GenericLoopNestRangeBuilder( + allPIvsRef.drop_front(nOuterPar), + loopRanges.drop_front(nOuterPar))([&] { + SmallVector allIvValues(allIvs.begin(), allIvs.end()); + LinalgScopedEmitter:: + emitScalarImplementation(allIvValues, linalgOp); + }); + }); + } else { + // If there are no parallel loops then fallback to generating all loop.for + // operations. + GenericLoopNestRangeBuilder(allPIvsRef, loopRanges)([&] { + SmallVector allIvValues(allIvs.begin(), allIvs.end()); + LinalgScopedEmitter::emitScalarImplementation(allIvValues, + linalgOp); + }); + } + } +}; +} // namespace + +template +Optional +LinalgOpToLoopsImpl::doit(Operation *op, + PatternRewriter &rewriter) { + using Impl = GenerateLoopNest; + using IndexedValueTy = + typename GenerateLoopNest::IndexedValueTy; + + ScopedContext scope(rewriter, op->getLoc()); // The flattened loopToOperandRangesMaps is expected to be an invertible // permutation map (which is asserted in the inverse calculation). @@ -563,8 +648,6 @@ LogicalResult LinalgOpToLoopsImpl::doit( auto nRed = linalgOp.getNumReductionLoops(); auto nWin = linalgOp.getNumWindowLoops(); auto nLoops = nPar + nRed + nWin; - if (!loweringIsAllowed(nPar, nLoops)) - return failure(); auto mapsRange = linalgOp.indexing_maps().template getAsRange(); auto maps = @@ -573,25 +656,34 @@ LogicalResult LinalgOpToLoopsImpl::doit( if (!invertedMap) { LinalgScopedEmitter::emitScalarImplementation( {}, linalgOp); - return success(); + return LinalgLoops(); } - SmallVector allIvs(nLoops, ValueHandle(b.getIndexType())); - SmallVector allPIvs = - makeHandlePointers(MutableArrayRef(allIvs)); - auto loopRanges = emitLoopRanges(scope.getBuilder(), scope.getLocation(), - invertedMap, getViewSizes(b, linalgOp)); + SmallVector allIvs(nLoops, + ValueHandle(rewriter.getIndexType())); + auto loopRanges = + emitLoopRanges(scope.getBuilder(), scope.getLocation(), invertedMap, + getViewSizes(rewriter, linalgOp)); assert(loopRanges.size() == allIvs.size()); - - GenericLoopNestRangeBuilder(allPIvs, loopRanges)([&] { - SmallVector allIvValues(allIvs.begin(), allIvs.end()); - LinalgScopedEmitter::emitScalarImplementation( - allIvValues, linalgOp); - }); - return success(); + Impl::doit(linalgOp, loopRanges, allIvs); + // Number of loop ops might be different from the number of ivs since some + // loops like affine.parallel and loop.parallel have multiple ivs. + llvm::SetVector loopSet; + for (ValueHandle &iv : allIvs) { + if (!iv.hasValue()) + return {}; + // The induction variable is a block argument of the entry block of the + // loop operation. + BlockArgument ivVal = iv.getValue().dyn_cast(); + if (!ivVal) + return {}; + loopSet.insert(ivVal.getOwner()->getParentOp()); + } + LinalgLoops loops(loopSet.begin(), loopSet.end()); + return loops; } -template +template class LinalgRewritePattern : public RewritePattern { public: explicit LinalgRewritePattern(MLIRContext *context) @@ -599,8 +691,8 @@ public: LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) const override { - using Impl = LinalgOpToLoopsImpl; - if (failed(Impl::doit(op, rewriter))) + using Impl = LinalgOpToLoopsImpl; + if (!Impl::doit(op, rewriter)) return failure(); rewriter.eraseOp(op); return success(); @@ -608,32 +700,28 @@ public: }; // Helper classes for type list expansion. -template +template class RewritePatternList; -template -class RewritePatternList { +template +class RewritePatternList { public: static void build(OwningRewritePatternList &patterns, MLIRContext *ctx) {} }; -template -class RewritePatternList { +template +class RewritePatternList { public: static void build(OwningRewritePatternList &patterns, MLIRContext *ctx) { - patterns - .insert>( - ctx); - RewritePatternList::build( - patterns, ctx); + patterns.insert>(ctx); + RewritePatternList::build(patterns, ctx); } }; /// Populate the given list with patterns that convert from Linalg to LLVM. -template +template void FillRewritePatterns(OwningRewritePatternList &patterns, MLIRContext *ctx) { - RewritePatternList::build(patterns, ctx); @@ -677,13 +765,13 @@ struct FoldAffineOp : public RewritePattern { }; } // namespace -template +template static void lowerLinalgToLoopsImpl(Operation *op, MLIRContext *context) { OwningRewritePatternList patterns; // Canonicalization and folding patterns applied greedily allow cleaning up // the emitted IR on the fly. // TODO(ntv) fold view and subview ops? - FillRewritePatterns(patterns, context); + FillRewritePatterns(patterns, context); DimOp::getCanonicalizationPatterns(patterns, context); AffineApplyOp::getCanonicalizationPatterns(patterns, context); patterns.insert(context); @@ -695,21 +783,18 @@ namespace { struct LowerToAffineLoops : public LinalgLowerToAffineLoopsBase { void runOnFunction() override { - lowerLinalgToLoopsImpl(getFunction(), - &getContext()); + lowerLinalgToLoopsImpl(getFunction(), &getContext()); } }; struct LowerToLoops : public LinalgLowerToLoopsBase { void runOnFunction() override { - lowerLinalgToLoopsImpl(getFunction(), - &getContext()); + lowerLinalgToLoopsImpl(getFunction(), &getContext()); } }; struct LowerToParallelLoops : public LinalgLowerToParallelLoopsBase { void runOnFunction() override { - lowerLinalgToLoopsImpl(getFunction(), - &getContext()); + lowerLinalgToLoopsImpl(getFunction(), &getContext()); } }; } // namespace @@ -728,28 +813,38 @@ mlir::createConvertLinalgToAffineLoopsPass() { return std::make_unique(); } +/// Emits a loop nest with the proper body for `op`. +template +Optional +mlir::linalg::linalgLowerOpToLoops(PatternRewriter &rewriter, Operation *op) { + return LinalgOpToLoopsImpl::doit(op, rewriter); +} + /// Emits a loop nest of `loop.for` with the proper body for `op`. template LogicalResult mlir::linalg::linalgOpToLoops(PatternRewriter &rewriter, Operation *op) { - return LinalgOpToLoopsImpl::doit( - op, rewriter); + Optional loops = + linalgLowerOpToLoops(rewriter, op); + return loops ? success() : failure(); } /// Emits a loop nest of `affine.for` with the proper body for `op`. template LogicalResult mlir::linalg::linalgOpToAffineLoops(PatternRewriter &rewriter, Operation *op) { - return LinalgOpToLoopsImpl::doit( - op, rewriter); + Optional loops = + linalgLowerOpToLoops(rewriter, op); + return loops ? success() : failure(); } /// Emits a loop nest of `loop.parallel` with the proper body for `op`. template LogicalResult mlir::linalg::linalgOpToParallelLoops(PatternRewriter &rewriter, Operation *op) { - return LinalgOpToLoopsImpl::doit(op, rewriter); + Optional loops = + linalgLowerOpToLoops(rewriter, op); + return loops ? success() : failure(); } // TODO(ntv) Need to make these instantiations more future-proof to avoid the @@ -758,7 +853,12 @@ LogicalResult mlir::linalg::linalgOpToParallelLoops(PatternRewriter &rewriter, template LogicalResult mlir::linalg::linalgOpToLoops( \ PatternRewriter & rewriter, Operation * op); \ template LogicalResult mlir::linalg::linalgOpToAffineLoops( \ - PatternRewriter & rewriter, Operation * op); + PatternRewriter & rewriter, Operation * op); \ + template LogicalResult mlir::linalg::linalgOpToParallelLoops( \ + PatternRewriter & rewriter, Operation * op); \ + template Optional \ + mlir::linalg::linalgLowerOpToLoops( \ + PatternRewriter & rewriter, Operation * op); INSTANTIATE_LINALG_OP_TO_LOOPS(CopyOp) INSTANTIATE_LINALG_OP_TO_LOOPS(FillOp) @@ -771,9 +871,3 @@ INSTANTIATE_LINALG_OP_TO_LOOPS(PoolingMinOp) INSTANTIATE_LINALG_OP_TO_LOOPS(PoolingSumOp) INSTANTIATE_LINALG_OP_TO_LOOPS(GenericOp) INSTANTIATE_LINALG_OP_TO_LOOPS(IndexedGenericOp) - -// TODO(pifon): Enable lowering to parallel loops for ops other than -// linalg.generic for now to be on the safe side. -template LogicalResult -mlir::linalg::linalgOpToParallelLoops(PatternRewriter &rewriter, - Operation *op); diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir index 1bd0cf61dd24..a4d3acd91c38 100644 --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -1,18 +1,30 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops | FileCheck %s +// RUN: mlir-opt %s -convert-linalg-to-loops | FileCheck --check-prefix=CHECKLOOP %s +// RUN: mlir-opt %s -convert-linalg-to-parallel-loops | FileCheck --check-prefix=CHECKPARALLEL %s // Test that we can lower all the way to LLVM without crashing, don't check results here. // RUN: mlir-opt %s --convert-linalg-to-llvm -o=/dev/null 2>&1 -// CHECK-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> -// CHECK-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> -// CHECK-DAG: #[[strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> -// CHECK-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> -// CHECK-DAG: #[[clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)> +// CHECKLOOP-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECKLOOP-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> +// CHECKLOOP-DAG: #[[strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> +// CHECKLOOP-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> +// CHECKLOOP-DAG: #[[clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)> -// CHECK-DAG: #[[Stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK-DAG: #[[Stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> -// CHECK-DAG: #[[Stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> -// CHECK-DAG: #[[Stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> +// CHECKLOOP-DAG: #[[Stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECKLOOP-DAG: #[[Stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> +// CHECKLOOP-DAG: #[[Stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> +// CHECKLOOP-DAG: #[[Stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> + +// CHECKPARALLEL-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECKPARALLEL-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> +// CHECKPARALLEL-DAG: #[[strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> +// CHECKPARALLEL-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> +// CHECKPARALLEL-DAG: #[[clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)> + +// CHECKPARALLEL-DAG: #[[Stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECKPARALLEL-DAG: #[[Stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> +// CHECKPARALLEL-DAG: #[[Stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> +// CHECKPARALLEL-DAG: #[[Stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> func @matmul(%arg0: memref, %M: index, %N: index, %K: index) { @@ -24,22 +36,40 @@ func @matmul(%arg0: memref, %M: index, %N: index, %K: index) { linalg.matmul(%A, %B, %C) : memref, memref, memref return } -// CHECK-LABEL: func @matmul(%{{.*}}: memref, -// CHECK-SAME: [[M:arg[0-9]+]]: index -// CHECK-SAME: [[N:arg[0-9]+]]: index -// CHECK-SAME: [[K:arg[0-9]+]]: index -// CHECK: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[N]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref -// CHECK-DAG: %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref -// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECK-DAG: %[[c:.*]] = load %[[C]][%{{.*}}, %{{.*}}] : memref -// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECK: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @matmul(%{{.*}}: memref, +// CHECKLOOP-SAME: [[M:arg[0-9]+]]: index +// CHECKLOOP-SAME: [[N:arg[0-9]+]]: index +// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index +// CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[N]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-DAG: %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKLOOP-DAG: %[[c:.*]] = load %[[C]][%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKLOOP: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @matmul(%{{.*}}: memref, +// CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index +// CHECKPARALLEL-SAME: [[N:arg[0-9]+]]: index +// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index +// CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKPARALLEL-DAG: %[[c:.*]] = load %[[C]][%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref + + func @matvec(%arg0: memref, %M: index, %N: index) { %c0 = constant 0 : index @@ -50,20 +80,36 @@ func @matvec(%arg0: memref, %M: index, %N: index) { linalg.matvec(%2, %3, %4) : memref, memref, memref return } -// CHECK-LABEL: func @matvec(%{{.*}}: memref, -// CHECK-SAME: [[M:arg[0-9]+]]: index -// CHECK-SAME: [[K:arg[0-9]+]]: index -// CHECK: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref -// CHECK-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref -// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECK-DAG: %[[c:.*]] = load %[[C]][%{{.*}}] : memref -// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECK: store %[[res]], %[[C]][%{{.*}}] : memref +// CHECKLOOP-LABEL: func @matvec(%{{.*}}: memref, +// CHECKLOOP-SAME: [[M:arg[0-9]+]]: index +// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index +// CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref +// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKLOOP-DAG: %[[c:.*]] = load %[[C]][%{{.*}}] : memref +// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKLOOP: store %[[res]], %[[C]][%{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @matvec(%{{.*}}: memref, +// CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index +// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index +// CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKPARALLEL: loop.parallel (%{{.*}}) = (%{{.*}}) to (%[[M]]) step (%{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKPARALLEL-DAG: %[[c:.*]] = load %[[C]][%{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[C]][%{{.*}}] : memref + func @dot(%arg0: memref, %M: index) { %c0 = constant 0 : index @@ -74,78 +120,126 @@ func @dot(%arg0: memref, %M: index) { linalg.dot(%1, %2, %3) : memref, memref, memref return } -// CHECK-LABEL: func @dot(%{{.*}}: memref, -// CHECK-SAME: [[K:arg[0-9]+]]: index -// CHECK: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref -// CHECK: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref -// CHECK: %[[C:.*]] = std.view %{{.*}}[][] : memref to memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK-DAG: %[[a:.*]] = load %[[A]][%{{.*}}] : memref -// CHECK-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref -// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECK-DAG: %[[c:.*]] = load %[[C]][] : memref -// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECK: store %[[res]], %[[C]][] : memref +// CHECKLOOP-LABEL: func @dot(%{{.*}}: memref, +// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index +// CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref +// CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref +// CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[][] : memref to memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP-DAG: %[[a:.*]] = load %[[A]][%{{.*}}] : memref +// CHECKLOOP-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref +// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKLOOP-DAG: %[[c:.*]] = load %[[C]][] : memref +// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKLOOP: store %[[res]], %[[C]][] : memref + +// CHECKPARALLEL-LABEL: func @dot(%{{.*}}: memref, +// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index +// CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref +// CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref +// CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[][] : memref to memref +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL-DAG: %[[a:.*]] = load %[[A]][%{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKPARALLEL-DAG: %[[c:.*]] = load %[[C]][] : memref +// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[C]][] : memref + func @dot_view(%arg0: memref, %arg1: memref, %arg2: memref) { linalg.dot(%arg0, %arg1, %arg2) : memref, memref, memref return } -// CHECK-LABEL: func @dot_view( -// CHECK: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECK: %[[K:.*]] = dim %arg0, 0 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK-DAG: %[[a:.*]] = load %arg0[%{{.*}}] : memref -// CHECK-DAG: %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref -// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECK-DAG: %[[c:.*]] = load %{{.*}}[] : memref -// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECK: store %[[res]], %{{.*}}[] : memref +// CHECKLOOP-LABEL: func @dot_view( +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: %[[K:.*]] = dim %arg0, 0 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP-DAG: %[[a:.*]] = load %arg0[%{{.*}}] : memref +// CHECKLOOP-DAG: %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref +// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKLOOP-DAG: %[[c:.*]] = load %{{.*}}[] : memref +// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKLOOP: store %[[res]], %{{.*}}[] : memref + +// CHECKPARALLEL-LABEL: func @dot_view( +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: %[[K:.*]] = dim %arg0, 0 : memref +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL-DAG: %[[a:.*]] = load %arg0[%{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKPARALLEL-DAG: %[[c:.*]] = load %{{.*}}[] : memref +// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %{{.*}}[] : memref func @fill_view(%arg0: memref, %arg1: f32) { linalg.fill(%arg0, %arg1) : memref, f32 return } -// CHECK-LABEL: func @fill_view( -// CHECK: %{{.*}}: memref, %{{.*}}: f32) { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}] : memref +// CHECKLOOP-LABEL: func @fill_view( +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: f32) { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @fill_view( +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: f32) { +// CHECKPARALLEL: loop.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}] : memref func @fill_view0(%arg0: memref, %arg1: f32) { linalg.fill(%arg0, %arg1) : memref, f32 return } -// CHECK-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { -// CHECK: store %{{.*}}, %{{.*}}[] : memref +// CHECKLOOP-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { +// CHECKLOOP: store %{{.*}}, %{{.*}}[] : memref + +// CHECKPARALLEL-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref func @fill_view3(%arg0: memref, %arg1: f32) { linalg.fill(%arg0, %arg1) : memref, f32 return } -// CHECK-LABEL: func @fill_view3( -// CHECK: %{{.*}}: memref, %{{.*}}: f32) { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @fill_view3( +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: f32) { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @fill_view3( +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: f32) { +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) { +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref func @copy_view(%arg0: memref, %arg1: memref) { linalg.copy(%arg0, %arg1) : memref, memref return } -// CHECK-LABEL: func @copy_view( -// CHECK: %{{.*}}: memref, %{{.*}}: memref) { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref -// CHECK: store %[[L]], %{{.*}}[%{{.*}}] : memref +// CHECKLOOP-LABEL: func @copy_view( +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref +// CHECKLOOP: store %[[L]], %{{.*}}[%{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @copy_view( +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: loop.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { +// CHECKPARALLEL: %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref +// CHECKPARALLEL: store %[[L]], %{{.*}}[%{{.*}}] : memref func @copy_view0(%arg0: memref, %arg1: memref) { linalg.copy(%arg0, %arg1) : memref, memref return } -// CHECK-LABEL: func @copy_view0(%{{.*}}: memref, %{{.*}}: memref) { -// CHECK: %{{.*}} = load %{{.*}}[] : memref -// CHECK: store %{{.*}}, %{{.*}}[] : memref +// CHECKLOOP-LABEL: func @copy_view0(%{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: %{{.*}} = load %{{.*}}[] : memref +// CHECKLOOP: store %{{.*}}, %{{.*}}[] : memref + +// CHECKPARALLEL-LABEL: func @copy_view0(%{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[] : memref +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref func @copy_view3(%arg0: memref, %arg1: memref) { linalg.copy(%arg0, %arg1) {inputPermutation = affine_map<(i, j, k) -> (i, k, j)>, @@ -153,66 +247,113 @@ func @copy_view3(%arg0: memref, %arg1: memref, memref return } -// CHECK-LABEL: func @copy_view3 -// CHECK: (%{{.*}}: memref, %{{.*}}: memref) { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @copy_view3 +// CHECKLOOP: (%{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @copy_view3 +// CHECKPARALLEL: (%{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) { +// CHECKPARALLEL: %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref func @conv_view3(%arg0: memref, %arg1: memref, %arg2: memref) { linalg.conv(%arg0, %arg1, %arg2) {strides = [2]}: memref, memref, memref return } -// CHECK-LABEL: func @conv_view3( -// CHECK: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECK: %[[Z0:.*]] = dim %arg0, 0 : memref -// CHECK: %[[Q:.*]] = dim %arg0, 1 : memref -// CHECK: %[[K:.*]] = dim %arg0, 2 : memref -// CHECK: %[[B:.*]] = dim %arg1, 0 : memref -// CHECK: %[[X0:.*]] = dim %arg2, 1 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECK: %[[SUM:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @conv_view3( +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: %[[Z0:.*]] = dim %arg0, 0 : memref +// CHECKLOOP: %[[Q:.*]] = dim %arg0, 1 : memref +// CHECKLOOP: %[[K:.*]] = dim %arg0, 2 : memref +// CHECKLOOP: %[[B:.*]] = dim %arg1, 0 : memref +// CHECKLOOP: %[[X0:.*]] = dim %arg2, 1 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKLOOP: %[[SUM:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @conv_view3( +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: %[[Z0:.*]] = dim %arg0, 0 : memref +// CHECKPARALLEL: %[[Q:.*]] = dim %arg0, 1 : memref +// CHECKPARALLEL: %[[K:.*]] = dim %arg0, 2 : memref +// CHECKPARALLEL: %[[B:.*]] = dim %arg1, 0 : memref +// CHECKPARALLEL: %[[X0:.*]] = dim %arg2, 1 : memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKPARALLEL: %[[SUM:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref func @conv_view4(%arg0: memref, %arg1: memref, %arg2: memref) { linalg.conv(%arg0, %arg1, %arg2) {dilations = [4, 5], strides = [2, 3]} : memref, memref, memref return } -// CHECK-LABEL: func @conv_view4( -// CHECK: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECK: %[[Z0:.*]] = dim %arg0, 0 : memref -// CHECK: %[[Z1:.*]] = dim %arg0, 1 : memref -// CHECK: %[[Q:.*]] = dim %arg0, 2 : memref -// CHECK: %[[K:.*]] = dim %arg0, 3 : memref -// CHECK: %[[B:.*]] = dim %arg1, 0 : memref -// CHECK: %[[X0:.*]] = dim %arg2, 1 : memref -// CHECK: %[[X1:.*]] = dim %arg2, 2 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECK: %[[SUM0:.*]] = affine.apply #[[Stride2Dilation4]](%{{.*}}, %{{.*}}) -// CHECK: %[[SUM1:.*]] = affine.apply #[[Stride3Dilation5]](%{{.*}}, %{{.*}}) -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @conv_view4( +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: %[[Z0:.*]] = dim %arg0, 0 : memref +// CHECKLOOP: %[[Z1:.*]] = dim %arg0, 1 : memref +// CHECKLOOP: %[[Q:.*]] = dim %arg0, 2 : memref +// CHECKLOOP: %[[K:.*]] = dim %arg0, 3 : memref +// CHECKLOOP: %[[B:.*]] = dim %arg1, 0 : memref +// CHECKLOOP: %[[X0:.*]] = dim %arg2, 1 : memref +// CHECKLOOP: %[[X1:.*]] = dim %arg2, 2 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKLOOP: %[[SUM0:.*]] = affine.apply #[[Stride2Dilation4]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[SUM1:.*]] = affine.apply #[[Stride3Dilation5]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @conv_view4( +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: %[[Z0:.*]] = dim %arg0, 0 : memref +// CHECKPARALLEL: %[[Z1:.*]] = dim %arg0, 1 : memref +// CHECKPARALLEL: %[[Q:.*]] = dim %arg0, 2 : memref +// CHECKPARALLEL: %[[K:.*]] = dim %arg0, 3 : memref +// CHECKPARALLEL: %[[B:.*]] = dim %arg1, 0 : memref +// CHECKPARALLEL: %[[X0:.*]] = dim %arg2, 1 : memref +// CHECKPARALLEL: %[[X1:.*]] = dim %arg2, 2 : memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #[[Stride2Dilation4]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #[[Stride3Dilation5]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref + func @conv_padding(%arg0: memref, %arg1: memref, @@ -223,34 +364,60 @@ func @conv_padding(%arg0: memref, memref, memref, memref return } -// CHECK-LABEL: func @conv_padding -// CHECK: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECK: %[[ZERO:.*]] = constant 0.000000e+00 : f32 -// CHECK: %[[Z0:.*]] = dim %arg0, 0 : memref -// CHECK: %[[Z1:.*]] = dim %arg0, 1 : memref -// CHECK: %[[Q:.*]] = dim %arg0, 2 : memref -// CHECK: %[[K:.*]] = dim %arg0, 3 : memref -// CHECK: %[[B:.*]] = dim %arg1, 0 : memref -// CHECK: %[[X0:.*]] = dim %arg2, 1 : memref -// CHECK: %[[X1:.*]] = dim %arg2, 2 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECK: %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]]) -// CHECK: %[[IDY:.*]] = affine.max #[[clampMinMap]](%[[SUM1]]) -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref -// CHECK: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @conv_padding +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: %[[ZERO:.*]] = constant 0.000000e+00 : f32 +// CHECKLOOP: %[[Z0:.*]] = dim %arg0, 0 : memref +// CHECKLOOP: %[[Z1:.*]] = dim %arg0, 1 : memref +// CHECKLOOP: %[[Q:.*]] = dim %arg0, 2 : memref +// CHECKLOOP: %[[K:.*]] = dim %arg0, 3 : memref +// CHECKLOOP: %[[B:.*]] = dim %arg1, 0 : memref +// CHECKLOOP: %[[X0:.*]] = dim %arg2, 1 : memref +// CHECKLOOP: %[[X1:.*]] = dim %arg2, 2 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKLOOP: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]]) +// CHECKLOOP: %[[IDY:.*]] = affine.max #[[clampMinMap]](%[[SUM1]]) +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @conv_padding +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: %[[ZERO:.*]] = constant 0.000000e+00 : f32 +// CHECKPARALLEL: %[[Z0:.*]] = dim %arg0, 0 : memref +// CHECKPARALLEL: %[[Z1:.*]] = dim %arg0, 1 : memref +// CHECKPARALLEL: %[[Q:.*]] = dim %arg0, 2 : memref +// CHECKPARALLEL: %[[K:.*]] = dim %arg0, 3 : memref +// CHECKPARALLEL: %[[B:.*]] = dim %arg1, 0 : memref +// CHECKPARALLEL: %[[X0:.*]] = dim %arg2, 1 : memref +// CHECKPARALLEL: %[[X1:.*]] = dim %arg2, 2 : memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]]) +// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[clampMinMap]](%[[SUM1]]) +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref func @pooling_max(%arg0: memref, %arg1: memref, @@ -259,21 +426,36 @@ func @pooling_max(%arg0: memref, memref, memref, memref return } -// CHECK-LABEL: func @pooling_max -// CHECK: %[[WX:.*]] = dim %arg1, 0 : memref -// CHECK: %[[WY:.*]] = dim %arg1, 1 : memref -// CHECK: %[[OX:.*]] = dim %arg2, 0 : memref -// CHECK: %[[OY:.*]] = dim %arg2, 1 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECK: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECK: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECK: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @pooling_max +// CHECKLOOP: %[[WX:.*]] = dim %arg1, 0 : memref +// CHECKLOOP: %[[WY:.*]] = dim %arg1, 1 : memref +// CHECKLOOP: %[[OX:.*]] = dim %arg2, 0 : memref +// CHECKLOOP: %[[OY:.*]] = dim %arg2, 1 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKLOOP: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @pooling_max +// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, 0 : memref +// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, 1 : memref +// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, 0 : memref +// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, 1 : memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref func @pooling_min(%arg0: memref, %arg1: memref, @@ -282,21 +464,36 @@ func @pooling_min(%arg0: memref, memref, memref, memref return } -// CHECK-LABEL: func @pooling_min -// CHECK: %[[WX:.*]] = dim %arg1, 0 : memref -// CHECK: %[[WY:.*]] = dim %arg1, 1 : memref -// CHECK: %[[OX:.*]] = dim %arg2, 0 : memref -// CHECK: %[[OY:.*]] = dim %arg2, 1 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECK: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECK: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECK: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @pooling_min +// CHECKLOOP: %[[WX:.*]] = dim %arg1, 0 : memref +// CHECKLOOP: %[[WY:.*]] = dim %arg1, 1 : memref +// CHECKLOOP: %[[OX:.*]] = dim %arg2, 0 : memref +// CHECKLOOP: %[[OY:.*]] = dim %arg2, 1 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKLOOP: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @pooling_min +// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, 0 : memref +// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, 1 : memref +// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, 0 : memref +// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, 1 : memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref func @pooling_sum(%arg0: memref, %arg1: memref, @@ -305,21 +502,36 @@ func @pooling_sum(%arg0: memref, memref, memref, memref return } -// CHECK-LABEL: func @pooling_sum -// CHECK: %[[WX:.*]] = dim %arg1, 0 : memref -// CHECK: %[[WY:.*]] = dim %arg1, 1 : memref -// CHECK: %[[OX:.*]] = dim %arg2, 0 : memref -// CHECK: %[[OY:.*]] = dim %arg2, 1 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECK: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECK: %[[LHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECK: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 -// CHECK: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @pooling_sum +// CHECKLOOP: %[[WX:.*]] = dim %arg1, 0 : memref +// CHECKLOOP: %[[WY:.*]] = dim %arg1, 1 : memref +// CHECKLOOP: %[[OX:.*]] = dim %arg2, 0 : memref +// CHECKLOOP: %[[OY:.*]] = dim %arg2, 1 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKLOOP: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKLOOP: %[[LHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 +// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @pooling_sum +// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, 0 : memref +// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, 1 : memref +// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, 0 : memref +// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, 1 : memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKPARALLEL: %[[LHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 +// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref func @foo(%0: f32, %1: f32, %2: f32) -> (f32, f32) { %f0 = constant 0.0 : f32 @@ -344,17 +556,27 @@ func @generic_function(%arg0: memref, %arg1 memref, memref, memref return } -// CHECK-LABEL: @foo -// CHECK-LABEL: @generic_function -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: loop.for %[[j:.*]] = {{.*}} -// CHECK: loop.for %[[k:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref -// CHECK: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECK: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref -// CHECK: %[[res:.*]]:2 = call @foo(%[[a]], %[[b]], %[[c]]) : (f32, f32, f32) -> (f32, f32) -// CHECK: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECK: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKLOOP-LABEL: @foo +// CHECKLOOP-LABEL: @generic_function +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[k:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref +// CHECKLOOP: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKLOOP: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKLOOP: %[[res:.*]]:2 = call @foo(%[[a]], %[[b]], %[[c]]) : (f32, f32, f32) -> (f32, f32) +// CHECKLOOP: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKLOOP: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref + +// CHECKPARALLEL-LABEL: @foo +// CHECKPARALLEL-LABEL: @generic_function +// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) +// CHECKPARALLEL: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref +// CHECKPARALLEL: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKPARALLEL: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKPARALLEL: %[[res:.*]]:2 = call @foo(%[[a]], %[[b]], %[[c]]) : (f32, f32, f32) -> (f32, f32) +// CHECKPARALLEL: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKPARALLEL: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref #trait2 = { args_in = 1, @@ -373,17 +595,27 @@ func @generic_region(%arg0: memref, %arg1: }: memref, memref, memref return } -// CHECK-LABEL: @generic_region -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: loop.for %[[j:.*]] = {{.*}} -// CHECK: loop.for %[[k:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref -// CHECK: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECK: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref -// CHECK: %[[d:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECK: %[[e:.*]] = addf %[[c]], %[[d]] : f32 -// CHECK: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECK: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKLOOP-LABEL: @generic_region +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[k:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref +// CHECKLOOP: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKLOOP: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKLOOP: %[[d:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKLOOP: %[[e:.*]] = addf %[[c]], %[[d]] : f32 +// CHECKLOOP: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKLOOP: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref + +// CHECKPARALLEL-LABEL: @generic_region +// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) +// CHECKPARALLEL: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref +// CHECKPARALLEL: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKPARALLEL: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKPARALLEL: %[[d:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKPARALLEL: %[[e:.*]] = addf %[[c]], %[[d]] : f32 +// CHECKPARALLEL: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKPARALLEL: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref func @indexed_foo(%i: index, %j: index, %k: index, %0: f32, %1: f32, %2: f32) -> (f32, f32) { %i_int = index_cast %i: index to i32 @@ -409,17 +641,27 @@ func @indexed_generic_function( memref return } -// CHECK-LABEL: @indexed_foo -// CHECK-LABEL: @indexed_generic_function -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: loop.for %[[j:.*]] = {{.*}} -// CHECK: loop.for %[[k:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref -// CHECK: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECK: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref -// CHECK: %[[res:.*]]:2 = call @indexed_foo(%[[i]], %[[j]], %[[k]], %[[a]], %[[b]], %[[c]]) : (index, index, index, f32, f32, f32) -> (f32, f32) -// CHECK: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECK: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKLOOP-LABEL: @indexed_foo +// CHECKLOOP-LABEL: @indexed_generic_function +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[k:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref +// CHECKLOOP: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKLOOP: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKLOOP: %[[res:.*]]:2 = call @indexed_foo(%[[i]], %[[j]], %[[k]], %[[a]], %[[b]], %[[c]]) : (index, index, index, f32, f32, f32) -> (f32, f32) +// CHECKLOOP: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKLOOP: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref + +// CHECKPARALLEL-LABEL: @indexed_foo +// CHECKPARALLEL-LABEL: @indexed_generic_function +// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) +// CHECKPARALLEL: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref +// CHECKPARALLEL: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKPARALLEL: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKPARALLEL: %[[res:.*]]:2 = call @indexed_foo(%[[i]], %[[j]], %[[k]], %[[a]], %[[b]], %[[c]]) : (index, index, index, f32, f32, f32) -> (f32, f32) +// CHECKPARALLEL: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKPARALLEL: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref #trait4 = { args_in = 1, @@ -450,21 +692,35 @@ func @indexed_generic_region( return } -// CHECK-LABEL: @indexed_generic_region -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: loop.for %[[j:.*]] = {{.*}} -// CHECK: loop.for %[[k:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] -// CHECK: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] -// CHECK: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] -// CHECK: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECK: %[[ij:.*]] = addi %[[i]], %[[j]] : index -// CHECK: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index -// CHECK: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32 -// CHECK: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32 -// CHECK: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32 -// CHECK: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] -// CHECK: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] +// CHECKLOOP-LABEL: @indexed_generic_region +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[k:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] +// CHECKLOOP: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] +// CHECKLOOP: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] +// CHECKLOOP: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKLOOP: %[[ij:.*]] = addi %[[i]], %[[j]] : index +// CHECKLOOP: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index +// CHECKLOOP: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32 +// CHECKLOOP: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32 +// CHECKLOOP: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32 +// CHECKLOOP: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] +// CHECKLOOP: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] + +// CHECKPARALLEL-LABEL: @indexed_generic_region +// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) +// CHECKPARALLEL: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] +// CHECKPARALLEL: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] +// CHECKPARALLEL: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] +// CHECKPARALLEL: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKPARALLEL: %[[ij:.*]] = addi %[[i]], %[[j]] : index +// CHECKPARALLEL: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index +// CHECKPARALLEL: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32 +// CHECKPARALLEL: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32 +// CHECKPARALLEL: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32 +// CHECKPARALLEL: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] +// CHECKPARALLEL: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] // ----- @@ -490,13 +746,20 @@ func @generic_op_zero_rank(%arg0: memref, %arg1: memref<3x4xf32>) return } -// CHECK-LABEL: @generic_op_zero_rank -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: loop.for %[[j:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %[[ARG0]][] -// CHECK: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] +// CHECKLOOP-LABEL: @generic_op_zero_rank +// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %[[ARG0]][] +// CHECKLOOP: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] + +// CHECKPARALLEL-LABEL: @generic_op_zero_rank +// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> +// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]]) +// CHECKPARALLEL: %[[a:.*]] = load %[[ARG0]][] +// CHECKPARALLEL: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] func @indexed_generic_op_zero_rank(%arg0: memref, %arg1: memref<3x4xi32>) { @@ -510,16 +773,26 @@ func @indexed_generic_op_zero_rank(%arg0: memref, %arg1: memref<3x4xi32>) return } -// CHECK-LABEL: @indexed_generic_op_zero_rank -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: loop.for %[[j:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %[[ARG0]][ -// CHECK: %[[ij:.*]] = addi %[[i]], %[[j]] : index -// CHECK: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32 -// CHECK: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32 -// CHECK: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] +// CHECKLOOP-LABEL: @indexed_generic_op_zero_rank +// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %[[ARG0]][ +// CHECKLOOP: %[[ij:.*]] = addi %[[i]], %[[j]] : index +// CHECKLOOP: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32 +// CHECKLOOP: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32 +// CHECKLOOP: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] + +// CHECKPARALLEL-LABEL: @indexed_generic_op_zero_rank +// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> +// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]]) +// CHECKPARALLEL: %[[a:.*]] = load %[[ARG0]][ +// CHECKPARALLEL: %[[ij:.*]] = addi %[[i]], %[[j]] : index +// CHECKPARALLEL: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32 +// CHECKPARALLEL: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32 +// CHECKPARALLEL: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] #reduce_1D_access = [ affine_map<(i) -> (i)>, @@ -543,14 +816,23 @@ func @generic_op_1D_reduce(%arg0: memref, %arg1: memref) } : memref, memref return } -// CHECK-LABEL: @generic_op_1D_reduce -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %[[ARG0]][%[[i]]] -// CHECK: %[[b:.*]] = load %[[ARG1]][] -// CHECK: %[[c:.*]] = addf %[[a]], %[[b]] : f32 -// CHECK: store %[[c]], %[[ARG1]][] +// CHECKLOOP-LABEL: @generic_op_1D_reduce +// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %[[ARG0]][%[[i]]] +// CHECKLOOP: %[[b:.*]] = load %[[ARG1]][] +// CHECKLOOP: %[[c:.*]] = addf %[[a]], %[[b]] : f32 +// CHECKLOOP: store %[[c]], %[[ARG1]][] + +// CHECKPARALLEL-LABEL: @generic_op_1D_reduce +// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL: loop.for %[[i:.*]] = {{.*}} +// CHECKPARALLEL: %[[a:.*]] = load %[[ARG0]][%[[i]]] +// CHECKPARALLEL: %[[b:.*]] = load %[[ARG1]][] +// CHECKPARALLEL: %[[c:.*]] = addf %[[a]], %[[b]] : f32 +// CHECKPARALLEL: store %[[c]], %[[ARG1]][] #reduce_init_1D_access = [ @@ -581,17 +863,29 @@ func @indexed_generic_op_1D_reduce(%arg0: memref, } : memref, memref, memref return } -// CHECK-LABEL: @indexed_generic_op_1D_reduce -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %[[ARG0]][%[[i]]] -// CHECK: %[[b:.*]] = load %[[ARG1]][] -// CHECK: %[[c:.*]] = load %[[ARG2]][] -// CHECK: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]] -// CHECK: %[[e:.*]] = addf %[[a]], %[[d]] -// CHECK: store %[[e]], %[[ARG2]][] +// CHECKLOOP-LABEL: @indexed_generic_op_1D_reduce +// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %[[ARG0]][%[[i]]] +// CHECKLOOP: %[[b:.*]] = load %[[ARG1]][] +// CHECKLOOP: %[[c:.*]] = load %[[ARG2]][] +// CHECKLOOP: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]] +// CHECKLOOP: %[[e:.*]] = addf %[[a]], %[[d]] +// CHECKLOOP: store %[[e]], %[[ARG2]][] + +// CHECKPARALLEL-LABEL: @indexed_generic_op_1D_reduce +// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL: loop.for %[[i:.*]] = {{.*}} +// CHECKPARALLEL: %[[a:.*]] = load %[[ARG0]][%[[i]]] +// CHECKPARALLEL: %[[b:.*]] = load %[[ARG1]][] +// CHECKPARALLEL: %[[c:.*]] = load %[[ARG2]][] +// CHECKPARALLEL: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]] +// CHECKPARALLEL: %[[e:.*]] = addf %[[a]], %[[d]] +// CHECKPARALLEL: store %[[e]], %[[ARG2]][] #trait_const_fill = { args_in = 0, @@ -601,15 +895,21 @@ func @indexed_generic_op_1D_reduce(%arg0: memref, library_call = "some_external_fn" } func @generic_const_init(%arg0: memref) { - %cst = constant 1.0 : f32 + %cst = constant 1.0 : f32 linalg.generic #trait_const_fill %arg0 { ^bb0(%arg1: f32): // no predecessors linalg.yield %cst : f32 }: memref return } -// CHECK-LABEL: @generic_const_init -// CHECK-SAME: %[[ARG0:.*]]: memref -// CHECK: %[[CONST:.*]] = constant 1.000000e+00 : f32 -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: store %[[CONST]], %[[ARG0]] +// CHECKLOOP-LABEL: @generic_const_init +// CHECKLOOP-SAME: %[[ARG0:.*]]: memref +// CHECKLOOP: %[[CONST:.*]] = constant 1.000000e+00 : f32 +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: store %[[CONST]], %[[ARG0]] + +// CHECKPARALLEL-LABEL: @generic_const_init +// CHECKPARALLEL-SAME: %[[ARG0:.*]]: memref +// CHECKPARALLEL: %[[CONST:.*]] = constant 1.000000e+00 : f32 +// CHECKPARALLEL: loop.parallel (%[[i:.*]]) +// CHECKPARALLEL: store %[[CONST]], %[[ARG0]] diff --git a/mlir/test/Dialect/Linalg/parallel_loops.mlir b/mlir/test/Dialect/Linalg/parallel_loops.mlir index c379fbc55b5e..1c7aee614b8b 100644 --- a/mlir/test/Dialect/Linalg/parallel_loops.mlir +++ b/mlir/test/Dialect/Linalg/parallel_loops.mlir @@ -32,22 +32,32 @@ func @linalg_generic_sum(%lhs: memref<2x2xf32>, // ----- #accesses = [ - affine_map<(m, n) -> (m, n)>, - affine_map<(m, n) -> (m)> + affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, + affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> ] #trait = { args_in = 1, args_out = 1, - iterator_types = ["parallel", "reduction"], + iterator_types = ["parallel", "parallel", "reduction", "parallel"], indexing_maps = #accesses } -func @do_not_lower_reduce(%A: memref<2x4xf32>, %B: memref<2xf32>) { +func @lower_outer_parallel(%A: memref, %B: memref) { linalg.generic #trait %A, %B { ^bb0(%a: f32, %b: f32): linalg.yield %a: f32 - } : memref<2x4xf32>, memref<2xf32> + } : memref, memref return } -// CHECK-LABEL: @do_not_lower_reduce -// CHECK: linalg.generic +// CHECK-LABEL: @lower_outer_parallel +// CHECK-DAG: %[[C0:.*]] = constant 0 +// CHECK-DAG: %[[C1:.*]] = constant 1 +// CHECK-DAG: %[[D0:.*]] = dim %{{.*}}, 0 +// CHECK-DAG: %[[D1:.*]] = dim %{{.*}}, 1 +// CHECK-DAG: %[[D2:.*]] = dim %{{.*}}, 2 +// CHECK-DAG: %[[D3:.*]] = dim %{{.*}}, 3 +// CHECK: loop.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]]) +// CHECK: loop.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]] +// CHECK: loop.for %[[IV3:.*]] = %[[C0]] to %[[D3]] step %[[C1]] +// CHECK: load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] +// CHECK: store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV3]]] \ No newline at end of file