2018-09-13 01:21:23 +08:00
|
|
|
//===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
|
|
|
|
//
|
|
|
|
// Copyright 2019 The MLIR Authors.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
// =============================================================================
|
|
|
|
//
|
|
|
|
// This file implements miscellaneous loop analysis routines.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "mlir/Analysis/LoopAnalysis.h"
|
|
|
|
|
|
|
|
#include "mlir/Analysis/AffineAnalysis.h"
|
2018-10-18 09:01:44 +08:00
|
|
|
#include "mlir/Analysis/AffineStructures.h"
|
|
|
|
#include "mlir/Analysis/MLFunctionMatcher.h"
|
|
|
|
#include "mlir/IR/Builders.h"
|
|
|
|
#include "mlir/IR/BuiltinOps.h"
|
2018-09-13 01:21:23 +08:00
|
|
|
#include "mlir/IR/Statements.h"
|
2018-10-18 09:01:44 +08:00
|
|
|
#include "mlir/StandardOps/StandardOps.h"
|
2018-10-30 22:54:23 +08:00
|
|
|
#include "mlir/Support/Functional.h"
|
2018-10-04 01:07:54 +08:00
|
|
|
#include "mlir/Support/MathExtras.h"
|
2018-09-13 01:21:23 +08:00
|
|
|
|
2018-10-04 06:39:12 +08:00
|
|
|
using namespace mlir;
|
2018-09-13 01:21:23 +08:00
|
|
|
|
|
|
|
/// Returns the trip count of the loop as an affine expression if the latter is
|
|
|
|
/// expressible as an affine expression, and nullptr otherwise. The trip count
|
|
|
|
/// expression is simplified before returning.
|
2018-10-09 04:47:18 +08:00
|
|
|
AffineExpr mlir::getTripCountExpr(const ForStmt &forStmt) {
|
2018-11-07 21:44:50 +08:00
|
|
|
// upper_bound - lower_bound
|
2018-09-13 01:21:23 +08:00
|
|
|
int64_t loopSpan;
|
|
|
|
|
|
|
|
int64_t step = forStmt.getStep();
|
|
|
|
auto *context = forStmt.getContext();
|
|
|
|
|
|
|
|
if (forStmt.hasConstantBounds()) {
|
|
|
|
int64_t lb = forStmt.getConstantLowerBound();
|
|
|
|
int64_t ub = forStmt.getConstantUpperBound();
|
2018-11-07 21:44:50 +08:00
|
|
|
loopSpan = ub - lb;
|
2018-09-13 01:21:23 +08:00
|
|
|
} else {
|
2018-10-10 07:39:24 +08:00
|
|
|
auto lbMap = forStmt.getLowerBoundMap();
|
|
|
|
auto ubMap = forStmt.getUpperBoundMap();
|
2018-09-13 01:21:23 +08:00
|
|
|
// TODO(bondhugula): handle max/min of multiple expressions.
|
2018-10-10 07:39:24 +08:00
|
|
|
if (lbMap.getNumResults() != 1 || ubMap.getNumResults() != 1)
|
2018-09-13 01:21:23 +08:00
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// TODO(bondhugula): handle bounds with different operands.
|
|
|
|
// Bounds have different operands, unhandled for now.
|
2018-09-19 01:22:03 +08:00
|
|
|
if (!forStmt.matchingBoundOperandList())
|
2018-09-13 01:21:23 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2018-11-07 21:44:50 +08:00
|
|
|
// ub_expr - lb_expr
|
2018-10-10 07:39:24 +08:00
|
|
|
AffineExpr lbExpr(lbMap.getResult(0));
|
|
|
|
AffineExpr ubExpr(ubMap.getResult(0));
|
2018-10-04 06:39:12 +08:00
|
|
|
auto loopSpanExpr = simplifyAffineExpr(
|
2018-11-07 21:44:50 +08:00
|
|
|
ubExpr - lbExpr, std::max(lbMap.getNumDims(), ubMap.getNumDims()),
|
2018-10-10 07:39:24 +08:00
|
|
|
std::max(lbMap.getNumSymbols(), ubMap.getNumSymbols()));
|
2018-10-09 04:47:18 +08:00
|
|
|
auto cExpr = loopSpanExpr.dyn_cast<AffineConstantExpr>();
|
2018-09-13 01:21:23 +08:00
|
|
|
if (!cExpr)
|
2018-10-09 01:20:25 +08:00
|
|
|
return loopSpanExpr.ceilDiv(step);
|
2018-10-10 01:59:27 +08:00
|
|
|
loopSpan = cExpr.getValue();
|
2018-09-13 01:21:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// 0 iteration loops.
|
2018-09-28 09:03:27 +08:00
|
|
|
if (loopSpan < 0)
|
2018-09-13 01:21:23 +08:00
|
|
|
return 0;
|
|
|
|
|
2018-10-09 01:20:25 +08:00
|
|
|
return getAffineConstantExpr(static_cast<uint64_t>(ceilDiv(loopSpan, step)),
|
|
|
|
context);
|
2018-09-13 01:21:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns the trip count of the loop if it's a constant, None otherwise. This
|
|
|
|
/// method uses affine expression analysis (in turn using getTripCount) and is
|
|
|
|
/// able to determine constant trip count in non-trivial cases.
|
|
|
|
llvm::Optional<uint64_t> mlir::getConstantTripCount(const ForStmt &forStmt) {
|
2018-10-04 06:39:12 +08:00
|
|
|
auto tripCountExpr = getTripCountExpr(forStmt);
|
2018-09-13 01:21:23 +08:00
|
|
|
|
2018-10-08 23:09:50 +08:00
|
|
|
if (!tripCountExpr)
|
|
|
|
return None;
|
|
|
|
|
2018-10-09 04:47:18 +08:00
|
|
|
if (auto constExpr = tripCountExpr.dyn_cast<AffineConstantExpr>())
|
2018-10-10 01:59:27 +08:00
|
|
|
return constExpr.getValue();
|
2018-09-13 01:21:23 +08:00
|
|
|
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns the greatest known integral divisor of the trip count. Affine
|
|
|
|
/// expression analysis is used (indirectly through getTripCount), and
|
|
|
|
/// this method is thus able to determine non-trivial divisors.
|
|
|
|
uint64_t mlir::getLargestDivisorOfTripCount(const ForStmt &forStmt) {
|
2018-10-04 06:39:12 +08:00
|
|
|
auto tripCountExpr = getTripCountExpr(forStmt);
|
2018-09-13 01:21:23 +08:00
|
|
|
|
|
|
|
if (!tripCountExpr)
|
|
|
|
return 1;
|
|
|
|
|
2018-10-09 04:47:18 +08:00
|
|
|
if (auto constExpr = tripCountExpr.dyn_cast<AffineConstantExpr>()) {
|
2018-10-10 01:59:27 +08:00
|
|
|
uint64_t tripCount = constExpr.getValue();
|
2018-09-13 01:21:23 +08:00
|
|
|
|
|
|
|
// 0 iteration loops (greatest divisor is 2^64 - 1).
|
|
|
|
if (tripCount == 0)
|
|
|
|
return ULONG_MAX;
|
|
|
|
|
|
|
|
// The greatest divisor is the trip count.
|
|
|
|
return tripCount;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Trip count is not a known constant; return its largest known divisor.
|
2018-10-10 01:59:27 +08:00
|
|
|
return tripCountExpr.getLargestKnownDivisor();
|
2018-09-13 01:21:23 +08:00
|
|
|
}
|
2018-10-18 09:01:44 +08:00
|
|
|
|
2018-10-31 05:59:22 +08:00
|
|
|
bool mlir::isAccessInvariant(const MLValue &input, MemRefType memRefType,
|
[MLIR] Extend vectorization to 2+-D patterns
This CL adds support for vectorization using more interesting 2-D and 3-D
patterns. Note in particular the fact that we match some pretty complex
imperfectly nested 2-D patterns with a quite minimal change to the
implementation: we just add a bit of recursion to traverse the matched
patterns and actually vectorize the loops.
For instance, vectorizing the following loop by 128:
```
for %i3 = 0 to %0 {
%7 = affine_apply (d0) -> (d0)(%i3)
%8 = load %arg0[%c0_0, %7] : memref<?x?xf32>
}
```
Currently generates:
```
#map0 = ()[s0] -> (s0 + 127)
#map1 = (d0) -> (d0)
for %i3 = 0 to #map0()[%0] step 128 {
%9 = affine_apply #map1(%i3)
%10 = alloc() : memref<1xvector<128xf32>>
%11 = "n_d_unaligned_load"(%arg0, %c0_0, %9, %10, %c0) :
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index) ->
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index)
%12 = load %10[%c0] : memref<1xvector<128xf32>>
}
```
The above is subject to evolution.
PiperOrigin-RevId: 219629745
2018-11-01 22:14:14 +08:00
|
|
|
ArrayRef<const MLValue *> indices, unsigned dim) {
|
2018-10-31 05:59:22 +08:00
|
|
|
assert(indices.size() == memRefType.getRank());
|
2018-10-18 09:01:44 +08:00
|
|
|
assert(dim < indices.size());
|
2018-10-31 05:59:22 +08:00
|
|
|
auto layoutMap = memRefType.getAffineMaps();
|
|
|
|
assert(memRefType.getAffineMaps().size() <= 1);
|
[MLIR] Extend vectorization to 2+-D patterns
This CL adds support for vectorization using more interesting 2-D and 3-D
patterns. Note in particular the fact that we match some pretty complex
imperfectly nested 2-D patterns with a quite minimal change to the
implementation: we just add a bit of recursion to traverse the matched
patterns and actually vectorize the loops.
For instance, vectorizing the following loop by 128:
```
for %i3 = 0 to %0 {
%7 = affine_apply (d0) -> (d0)(%i3)
%8 = load %arg0[%c0_0, %7] : memref<?x?xf32>
}
```
Currently generates:
```
#map0 = ()[s0] -> (s0 + 127)
#map1 = (d0) -> (d0)
for %i3 = 0 to #map0()[%0] step 128 {
%9 = affine_apply #map1(%i3)
%10 = alloc() : memref<1xvector<128xf32>>
%11 = "n_d_unaligned_load"(%arg0, %c0_0, %9, %10, %c0) :
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index) ->
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index)
%12 = load %10[%c0] : memref<1xvector<128xf32>>
}
```
The above is subject to evolution.
PiperOrigin-RevId: 219629745
2018-11-01 22:14:14 +08:00
|
|
|
// TODO(ntv): remove dependence on Builder once we support non-identity
|
2018-10-18 09:01:44 +08:00
|
|
|
// layout map.
|
2018-10-31 05:59:22 +08:00
|
|
|
Builder b(memRefType.getContext());
|
2018-10-18 09:01:44 +08:00
|
|
|
assert(layoutMap.empty() ||
|
|
|
|
layoutMap[0] == b.getMultiDimIdentityMap(indices.size()));
|
2018-10-19 02:14:26 +08:00
|
|
|
(void)layoutMap;
|
2018-10-18 09:01:44 +08:00
|
|
|
|
|
|
|
SmallVector<OperationStmt *, 4> affineApplyOps;
|
[MLIR] Extend vectorization to 2+-D patterns
This CL adds support for vectorization using more interesting 2-D and 3-D
patterns. Note in particular the fact that we match some pretty complex
imperfectly nested 2-D patterns with a quite minimal change to the
implementation: we just add a bit of recursion to traverse the matched
patterns and actually vectorize the loops.
For instance, vectorizing the following loop by 128:
```
for %i3 = 0 to %0 {
%7 = affine_apply (d0) -> (d0)(%i3)
%8 = load %arg0[%c0_0, %7] : memref<?x?xf32>
}
```
Currently generates:
```
#map0 = ()[s0] -> (s0 + 127)
#map1 = (d0) -> (d0)
for %i3 = 0 to #map0()[%0] step 128 {
%9 = affine_apply #map1(%i3)
%10 = alloc() : memref<1xvector<128xf32>>
%11 = "n_d_unaligned_load"(%arg0, %c0_0, %9, %10, %c0) :
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index) ->
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index)
%12 = load %10[%c0] : memref<1xvector<128xf32>>
}
```
The above is subject to evolution.
PiperOrigin-RevId: 219629745
2018-11-01 22:14:14 +08:00
|
|
|
getReachableAffineApplyOps({const_cast<MLValue *>(indices[dim])},
|
|
|
|
affineApplyOps);
|
2018-10-18 09:01:44 +08:00
|
|
|
|
|
|
|
if (affineApplyOps.empty()) {
|
|
|
|
// Pointer equality test because of MLValue pointer semantics.
|
2018-10-30 22:54:23 +08:00
|
|
|
return indices[dim] != &input;
|
2018-10-18 09:01:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
assert(affineApplyOps.size() == 1 &&
|
|
|
|
"CompositionAffineMapsPass must have "
|
|
|
|
"been run: there should be at most one AffineApplyOp");
|
2018-10-20 00:07:58 +08:00
|
|
|
auto composeOp = affineApplyOps[0]->cast<AffineApplyOp>();
|
2018-10-30 22:54:23 +08:00
|
|
|
// We need yet another level of indirection because the `dim` index of the
|
|
|
|
// access may not correspond to the `dim` index of composeOp.
|
|
|
|
unsigned idx = std::numeric_limits<unsigned>::max();
|
|
|
|
unsigned numResults = composeOp->getNumResults();
|
|
|
|
for (unsigned i = 0; i < numResults; ++i) {
|
|
|
|
if (indices[dim] == composeOp->getResult(i)) {
|
|
|
|
idx = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(idx < std::numeric_limits<unsigned>::max());
|
|
|
|
return !AffineValueMap(*composeOp)
|
|
|
|
.isFunctionOf(idx, &const_cast<MLValue &>(input));
|
2018-10-18 09:01:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Determines whether a load or a store has a contiguous access along the
|
|
|
|
/// value `input`. Contiguous is defined as either invariant or varying only
|
|
|
|
/// along the fastest varying memory dimension.
|
|
|
|
// TODO(ntv): allow more advanced notions of contiguity (non-fastest varying,
|
|
|
|
// check strides, ...).
|
|
|
|
template <typename LoadOrStoreOpPointer>
|
2018-10-30 22:54:23 +08:00
|
|
|
static bool isContiguousAccess(const MLValue &input,
|
|
|
|
LoadOrStoreOpPointer memoryOp,
|
|
|
|
unsigned fastestVaryingDim) {
|
|
|
|
using namespace functional;
|
[MLIR] Extend vectorization to 2+-D patterns
This CL adds support for vectorization using more interesting 2-D and 3-D
patterns. Note in particular the fact that we match some pretty complex
imperfectly nested 2-D patterns with a quite minimal change to the
implementation: we just add a bit of recursion to traverse the matched
patterns and actually vectorize the loops.
For instance, vectorizing the following loop by 128:
```
for %i3 = 0 to %0 {
%7 = affine_apply (d0) -> (d0)(%i3)
%8 = load %arg0[%c0_0, %7] : memref<?x?xf32>
}
```
Currently generates:
```
#map0 = ()[s0] -> (s0 + 127)
#map1 = (d0) -> (d0)
for %i3 = 0 to #map0()[%0] step 128 {
%9 = affine_apply #map1(%i3)
%10 = alloc() : memref<1xvector<128xf32>>
%11 = "n_d_unaligned_load"(%arg0, %c0_0, %9, %10, %c0) :
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index) ->
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index)
%12 = load %10[%c0] : memref<1xvector<128xf32>>
}
```
The above is subject to evolution.
PiperOrigin-RevId: 219629745
2018-11-01 22:14:14 +08:00
|
|
|
auto indices = map([](const SSAValue *val) { return dyn_cast<MLValue>(val); },
|
2018-10-30 22:54:23 +08:00
|
|
|
memoryOp->getIndices());
|
2018-10-31 05:59:22 +08:00
|
|
|
auto memRefType = memoryOp->getMemRefType();
|
2018-10-30 22:54:23 +08:00
|
|
|
for (unsigned d = 0, numIndices = indices.size(); d < numIndices; ++d) {
|
|
|
|
if (fastestVaryingDim == (numIndices - 1) - d) {
|
|
|
|
continue;
|
|
|
|
}
|
2018-10-18 09:01:44 +08:00
|
|
|
if (!isAccessInvariant(input, memRefType, indices, d)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-10-30 22:54:23 +08:00
|
|
|
template <typename LoadOrStoreOpPointer>
|
|
|
|
static bool isVectorElement(LoadOrStoreOpPointer memoryOp) {
|
2018-10-31 05:59:22 +08:00
|
|
|
auto memRefType = memoryOp->getMemRefType();
|
|
|
|
return memRefType.getElementType().template isa<VectorType>();
|
2018-10-30 22:54:23 +08:00
|
|
|
}
|
|
|
|
|
[MLIR] Extend vectorization to 2+-D patterns
This CL adds support for vectorization using more interesting 2-D and 3-D
patterns. Note in particular the fact that we match some pretty complex
imperfectly nested 2-D patterns with a quite minimal change to the
implementation: we just add a bit of recursion to traverse the matched
patterns and actually vectorize the loops.
For instance, vectorizing the following loop by 128:
```
for %i3 = 0 to %0 {
%7 = affine_apply (d0) -> (d0)(%i3)
%8 = load %arg0[%c0_0, %7] : memref<?x?xf32>
}
```
Currently generates:
```
#map0 = ()[s0] -> (s0 + 127)
#map1 = (d0) -> (d0)
for %i3 = 0 to #map0()[%0] step 128 {
%9 = affine_apply #map1(%i3)
%10 = alloc() : memref<1xvector<128xf32>>
%11 = "n_d_unaligned_load"(%arg0, %c0_0, %9, %10, %c0) :
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index) ->
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index)
%12 = load %10[%c0] : memref<1xvector<128xf32>>
}
```
The above is subject to evolution.
PiperOrigin-RevId: 219629745
2018-11-01 22:14:14 +08:00
|
|
|
using VectorizableStmtFun =
|
|
|
|
std::function<bool(const ForStmt &, const OperationStmt &)>;
|
|
|
|
|
|
|
|
static bool isVectorizableLoopWithCond(const ForStmt &loop,
|
|
|
|
VectorizableStmtFun isVectorizableStmt) {
|
2018-10-30 22:54:23 +08:00
|
|
|
if (!matcher::isParallelLoop(loop) && !matcher::isReductionLoop(loop)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// No vectorization across conditionals for now.
|
|
|
|
auto conditionals = matcher::If();
|
|
|
|
auto *forStmt = const_cast<ForStmt *>(&loop);
|
|
|
|
auto conditionalsMatched = conditionals.match(forStmt);
|
|
|
|
if (!conditionalsMatched.empty()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
|
|
|
|
auto loadAndStoresMatched = loadAndStores.match(forStmt);
|
|
|
|
for (auto ls : loadAndStoresMatched) {
|
2018-10-18 09:01:44 +08:00
|
|
|
auto *op = cast<OperationStmt>(ls.first);
|
2018-10-20 00:07:58 +08:00
|
|
|
auto load = op->dyn_cast<LoadOp>();
|
|
|
|
auto store = op->dyn_cast<StoreOp>();
|
2018-10-30 22:54:23 +08:00
|
|
|
// Only scalar types are considered vectorizable, all load/store must be
|
|
|
|
// vectorizable for a loop to qualify as vectorizable.
|
|
|
|
// TODO(ntv): ponder whether we want to be more general here.
|
|
|
|
bool vector = load ? isVectorElement(load) : isVectorElement(store);
|
|
|
|
if (vector) {
|
|
|
|
return false;
|
|
|
|
}
|
[MLIR] Extend vectorization to 2+-D patterns
This CL adds support for vectorization using more interesting 2-D and 3-D
patterns. Note in particular the fact that we match some pretty complex
imperfectly nested 2-D patterns with a quite minimal change to the
implementation: we just add a bit of recursion to traverse the matched
patterns and actually vectorize the loops.
For instance, vectorizing the following loop by 128:
```
for %i3 = 0 to %0 {
%7 = affine_apply (d0) -> (d0)(%i3)
%8 = load %arg0[%c0_0, %7] : memref<?x?xf32>
}
```
Currently generates:
```
#map0 = ()[s0] -> (s0 + 127)
#map1 = (d0) -> (d0)
for %i3 = 0 to #map0()[%0] step 128 {
%9 = affine_apply #map1(%i3)
%10 = alloc() : memref<1xvector<128xf32>>
%11 = "n_d_unaligned_load"(%arg0, %c0_0, %9, %10, %c0) :
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index) ->
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index)
%12 = load %10[%c0] : memref<1xvector<128xf32>>
}
```
The above is subject to evolution.
PiperOrigin-RevId: 219629745
2018-11-01 22:14:14 +08:00
|
|
|
if (!isVectorizableStmt(loop, *op)) {
|
2018-10-18 09:01:44 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2018-10-19 02:14:26 +08:00
|
|
|
|
[MLIR] Extend vectorization to 2+-D patterns
This CL adds support for vectorization using more interesting 2-D and 3-D
patterns. Note in particular the fact that we match some pretty complex
imperfectly nested 2-D patterns with a quite minimal change to the
implementation: we just add a bit of recursion to traverse the matched
patterns and actually vectorize the loops.
For instance, vectorizing the following loop by 128:
```
for %i3 = 0 to %0 {
%7 = affine_apply (d0) -> (d0)(%i3)
%8 = load %arg0[%c0_0, %7] : memref<?x?xf32>
}
```
Currently generates:
```
#map0 = ()[s0] -> (s0 + 127)
#map1 = (d0) -> (d0)
for %i3 = 0 to #map0()[%0] step 128 {
%9 = affine_apply #map1(%i3)
%10 = alloc() : memref<1xvector<128xf32>>
%11 = "n_d_unaligned_load"(%arg0, %c0_0, %9, %10, %c0) :
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index) ->
(memref<?x?xf32>, index, index, memref<1xvector<128xf32>>, index)
%12 = load %10[%c0] : memref<1xvector<128xf32>>
}
```
The above is subject to evolution.
PiperOrigin-RevId: 219629745
2018-11-01 22:14:14 +08:00
|
|
|
bool mlir::isVectorizableLoopAlongFastestVaryingMemRefDim(
|
|
|
|
const ForStmt &loop, unsigned fastestVaryingDim) {
|
|
|
|
VectorizableStmtFun fun(
|
|
|
|
[fastestVaryingDim](const ForStmt &loop, const OperationStmt &op) {
|
|
|
|
auto load = op.dyn_cast<LoadOp>();
|
|
|
|
auto store = op.dyn_cast<StoreOp>();
|
|
|
|
return load ? isContiguousAccess(loop, load, fastestVaryingDim)
|
|
|
|
: isContiguousAccess(loop, store, fastestVaryingDim);
|
|
|
|
});
|
|
|
|
return isVectorizableLoopWithCond(loop, fun);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool mlir::isVectorizableLoop(const ForStmt &loop) {
|
|
|
|
VectorizableStmtFun fun(
|
|
|
|
// TODO: implement me
|
|
|
|
[](const ForStmt &loop, const OperationStmt &op) { return true; });
|
|
|
|
return isVectorizableLoopWithCond(loop, fun);
|
|
|
|
}
|
|
|
|
|
2018-10-19 02:14:26 +08:00
|
|
|
/// Checks whether SSA dominance would be violated if a for stmt's body
|
|
|
|
/// statements are shifted by the specified shifts. This method checks if a
|
|
|
|
/// 'def' and all its uses have the same shift factor.
|
|
|
|
// TODO(mlir-team): extend this to check for memory-based dependence
|
|
|
|
// violation when we have the support.
|
|
|
|
bool mlir::isStmtwiseShiftValid(const ForStmt &forStmt,
|
|
|
|
ArrayRef<uint64_t> shifts) {
|
|
|
|
assert(shifts.size() == forStmt.getStatements().size());
|
|
|
|
unsigned s = 0;
|
|
|
|
for (const auto &stmt : forStmt) {
|
|
|
|
// A for or if stmt does not produce any def/results (that are used
|
|
|
|
// outside).
|
|
|
|
if (const auto *opStmt = dyn_cast<OperationStmt>(&stmt)) {
|
|
|
|
for (unsigned i = 0, e = opStmt->getNumResults(); i < e; ++i) {
|
|
|
|
const MLValue *result = opStmt->getResult(i);
|
|
|
|
for (const StmtOperand &use : result->getUses()) {
|
|
|
|
// If an ancestor statement doesn't lie in the block of forStmt, there
|
|
|
|
// is no shift to check.
|
|
|
|
// This is a naive way. If performance becomes an issue, a map can
|
|
|
|
// be used to store 'shifts' - to look up the shift for a statement in
|
|
|
|
// constant time.
|
|
|
|
if (auto *ancStmt = forStmt.findAncestorStmtInBlock(*use.getOwner()))
|
|
|
|
if (shifts[s] != shifts[forStmt.findStmtPosInBlock(*ancStmt)])
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s++;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|