forked from OSchip/llvm-project
[mlir][vector] Improve vector distribute integration test and fix block distribution
Fix semantic in the distribute integration test based on offline feedback. This exposed a bug in block distribution, we need to make sure the id is multiplied by the stride of the vector. Fix the transformation and unit test. Differential Revision: https://reviews.llvm.org/D89291
This commit is contained in:
parent
74b078294f
commit
5d45f758f0
|
@ -1,9 +1,18 @@
|
|||
// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32 \
|
||||
// RUN: -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \
|
||||
// RUN: mlir-opt %s -test-vector-to-forloop -convert-vector-to-scf \
|
||||
// RUN: -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \
|
||||
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
|
||||
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \
|
||||
// RUN: FileCheck %s
|
||||
|
||||
// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine \
|
||||
// RUN: -convert-scf-to-std -convert-vector-to-llvm | mlir-cpu-runner -e main \
|
||||
// RUN: -entry-point-result=void \
|
||||
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \
|
||||
// RUN: FileCheck %s
|
||||
|
||||
// RUN: mlir-opt %s -test-vector-to-forloop | FileCheck %s -check-prefix=TRANSFORM
|
||||
|
||||
|
||||
func @print_memref_f32(memref<*xf32>)
|
||||
|
||||
func @alloc_1d_filled_inc_f32(%arg0: index, %arg1: f32) -> memref<?xf32> {
|
||||
|
@ -19,30 +28,29 @@ func @alloc_1d_filled_inc_f32(%arg0: index, %arg1: f32) -> memref<?xf32> {
|
|||
return %0 : memref<?xf32>
|
||||
}
|
||||
|
||||
func @vector_add_cycle(%id : index, %A: memref<?xf32>, %B: memref<?xf32>, %C: memref<?xf32>) {
|
||||
%c0 = constant 0 : index
|
||||
%cf0 = constant 0.0 : f32
|
||||
%a = vector.transfer_read %A[%c0], %cf0: memref<?xf32>, vector<64xf32>
|
||||
%b = vector.transfer_read %B[%c0], %cf0: memref<?xf32>, vector<64xf32>
|
||||
%acc = addf %a, %b: vector<64xf32>
|
||||
vector.transfer_write %acc, %C[%c0]: vector<64xf32>, memref<?xf32>
|
||||
return
|
||||
}
|
||||
|
||||
// Loop over a function containinng a large add vector and distribute it so that
|
||||
// each iteration of the loop process part of the vector operation.
|
||||
// Large vector addf that can be broken down into a loop of smaller vector addf.
|
||||
func @main() {
|
||||
%cf0 = constant 0.0 : f32
|
||||
%cf1 = constant 1.0 : f32
|
||||
%cf2 = constant 2.0 : f32
|
||||
%c0 = constant 0 : index
|
||||
%c1 = constant 1 : index
|
||||
%c32 = constant 32 : index
|
||||
%c64 = constant 64 : index
|
||||
%out = alloc(%c64) : memref<?xf32>
|
||||
%in1 = call @alloc_1d_filled_inc_f32(%c64, %cf1) : (index, f32) -> memref<?xf32>
|
||||
%in2 = call @alloc_1d_filled_inc_f32(%c64, %cf2) : (index, f32) -> memref<?xf32>
|
||||
scf.for %arg5 = %c0 to %c64 step %c1 {
|
||||
call @vector_add_cycle(%arg5, %in1, %in2, %out) : (index, memref<?xf32>, memref<?xf32>, memref<?xf32>) -> ()
|
||||
}
|
||||
// Check that the tansformatio correctly happened.
|
||||
// TRANSFORM: scf.for
|
||||
// TRANSFORM: vector.transfer_read {{.*}} : memref<?xf32>, vector<2xf32>
|
||||
// TRANSFORM: vector.transfer_read {{.*}} : memref<?xf32>, vector<2xf32>
|
||||
// TRANSFORM: %{{.*}} = addf %{{.*}}, %{{.*}} : vector<2xf32>
|
||||
// TRANSFORM: vector.transfer_write {{.*}} : vector<2xf32>, memref<?xf32>
|
||||
// TRANSFORM: }
|
||||
%a = vector.transfer_read %in1[%c0], %cf0: memref<?xf32>, vector<64xf32>
|
||||
%b = vector.transfer_read %in2[%c0], %cf0: memref<?xf32>, vector<64xf32>
|
||||
%acc = addf %a, %b: vector<64xf32>
|
||||
vector.transfer_write %acc, %out[%c0]: vector<64xf32>, memref<?xf32>
|
||||
%converted = memref_cast %out : memref<?xf32> to memref<*xf32>
|
||||
call @print_memref_f32(%converted): (memref<*xf32>) -> ()
|
||||
// CHECK: Unranked{{.*}}data =
|
||||
|
|
|
@ -2526,9 +2526,13 @@ struct TransferReadExtractPattern
|
|||
return failure();
|
||||
edsc::ScopedContext scope(rewriter, read.getLoc());
|
||||
using mlir::edsc::op::operator+;
|
||||
using mlir::edsc::op::operator*;
|
||||
using namespace mlir::edsc::intrinsics;
|
||||
SmallVector<Value, 4> indices(read.indices().begin(), read.indices().end());
|
||||
indices.back() = indices.back() + extract.id();
|
||||
indices.back() =
|
||||
indices.back() +
|
||||
(extract.id() *
|
||||
std_constant_index(extract.getResultType().getDimSize(0)));
|
||||
Value newRead = vector_transfer_read(extract.getType(), read.memref(),
|
||||
indices, read.permutation_map(),
|
||||
read.padding(), ArrayAttr());
|
||||
|
@ -2552,10 +2556,14 @@ struct TransferWriteInsertPattern
|
|||
return failure();
|
||||
edsc::ScopedContext scope(rewriter, write.getLoc());
|
||||
using mlir::edsc::op::operator+;
|
||||
using mlir::edsc::op::operator*;
|
||||
using namespace mlir::edsc::intrinsics;
|
||||
SmallVector<Value, 4> indices(write.indices().begin(),
|
||||
write.indices().end());
|
||||
indices.back() = indices.back() + insert.id();
|
||||
indices.back() =
|
||||
indices.back() +
|
||||
(insert.id() *
|
||||
std_constant_index(insert.getSourceVectorType().getDimSize(0)));
|
||||
vector_transfer_write(insert.vector(), write.memref(), indices,
|
||||
write.permutation_map(), ArrayAttr());
|
||||
rewriter.eraseOp(write);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32 | FileCheck %s
|
||||
// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32 -split-input-file | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: func @distribute_vector_add
|
||||
// CHECK-SAME: (%[[ID:.*]]: index
|
||||
|
@ -13,6 +13,8 @@ func @distribute_vector_add(%id : index, %A: vector<32xf32>, %B: vector<32xf32>)
|
|||
return %0: vector<32xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @vector_add_read_write
|
||||
// CHECK-SAME: (%[[ID:.*]]: index
|
||||
// CHECK: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
|
||||
|
@ -34,12 +36,19 @@ func @vector_add_read_write(%id : index, %A: memref<32xf32>, %B: memref<32xf32>,
|
|||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @vector_add_cycle
|
||||
// -----
|
||||
|
||||
// CHECK-DAG: #[[MAP0:map[0-9]+]] = affine_map<()[s0] -> (s0 * 2)>
|
||||
|
||||
// CHECK: func @vector_add_cycle
|
||||
// CHECK-SAME: (%[[ID:.*]]: index
|
||||
// CHECK: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<64xf32>, vector<2xf32>
|
||||
// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<64xf32>, vector<2xf32>
|
||||
// CHECK: %[[ID1:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
|
||||
// CHECK-NEXT: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID1]]], %{{.*}} : memref<64xf32>, vector<2xf32>
|
||||
// CHECK-NEXT: %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
|
||||
// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID2]]], %{{.*}} : memref<64xf32>, vector<2xf32>
|
||||
// CHECK-NEXT: %[[ADD:.*]] = addf %[[EXA]], %[[EXB]] : vector<2xf32>
|
||||
// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID]]] : vector<2xf32>, memref<64xf32>
|
||||
// CHECK-NEXT: %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
|
||||
// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID3]]] : vector<2xf32>, memref<64xf32>
|
||||
// CHECK-NEXT: return
|
||||
func @vector_add_cycle(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C: memref<64xf32>) {
|
||||
%c0 = constant 0 : index
|
||||
|
@ -51,6 +60,8 @@ func @vector_add_cycle(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C:
|
|||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// Negative test to make sure nothing is done in case the vector size is not a
|
||||
// multiple of multiplicity.
|
||||
// CHECK-LABEL: func @vector_negative_test
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include <type_traits>
|
||||
|
||||
#include "mlir/Analysis/SliceAnalysis.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
|
@ -185,6 +186,64 @@ struct TestVectorDistributePatterns
|
|||
}
|
||||
};
|
||||
|
||||
struct TestVectorToLoopPatterns
|
||||
: public PassWrapper<TestVectorToLoopPatterns, FunctionPass> {
|
||||
TestVectorToLoopPatterns() = default;
|
||||
TestVectorToLoopPatterns(const TestVectorToLoopPatterns &pass) {}
|
||||
void getDependentDialects(DialectRegistry ®istry) const override {
|
||||
registry.insert<VectorDialect>();
|
||||
registry.insert<AffineDialect>();
|
||||
}
|
||||
Option<int32_t> multiplicity{
|
||||
*this, "distribution-multiplicity",
|
||||
llvm::cl::desc("Set the multiplicity used for distributing vector"),
|
||||
llvm::cl::init(32)};
|
||||
void runOnFunction() override {
|
||||
MLIRContext *ctx = &getContext();
|
||||
OwningRewritePatternList patterns;
|
||||
FuncOp func = getFunction();
|
||||
func.walk([&](AddFOp op) {
|
||||
// Check that the operation type can be broken down into a loop.
|
||||
VectorType type = op.getType().dyn_cast<VectorType>();
|
||||
if (!type || type.getRank() != 1 ||
|
||||
type.getNumElements() % multiplicity != 0)
|
||||
return mlir::WalkResult::advance();
|
||||
auto filterAlloc = [](Operation *op) {
|
||||
if (isa<ConstantOp, AllocOp, CallOp>(op))
|
||||
return false;
|
||||
return true;
|
||||
};
|
||||
auto dependentOps = getSlice(op, filterAlloc);
|
||||
// Create a loop and move instructions from the Op slice into the loop.
|
||||
OpBuilder builder(op);
|
||||
auto zero = builder.create<ConstantOp>(
|
||||
op.getLoc(), builder.getIndexType(),
|
||||
builder.getIntegerAttr(builder.getIndexType(), 0));
|
||||
auto one = builder.create<ConstantOp>(
|
||||
op.getLoc(), builder.getIndexType(),
|
||||
builder.getIntegerAttr(builder.getIndexType(), 1));
|
||||
auto numIter = builder.create<ConstantOp>(
|
||||
op.getLoc(), builder.getIndexType(),
|
||||
builder.getIntegerAttr(builder.getIndexType(), multiplicity));
|
||||
auto forOp = builder.create<scf::ForOp>(op.getLoc(), zero, numIter, one);
|
||||
for (Operation *it : dependentOps) {
|
||||
it->moveBefore(forOp.getBody()->getTerminator());
|
||||
}
|
||||
// break up the original op and let the patterns propagate.
|
||||
Optional<mlir::vector::DistributeOps> ops = distributPointwiseVectorOp(
|
||||
builder, op.getOperation(), forOp.getInductionVar(), multiplicity);
|
||||
if (ops.hasValue()) {
|
||||
SmallPtrSet<Operation *, 1> extractOp({ops->extract, ops->insert});
|
||||
op.getResult().replaceAllUsesExcept(ops->insert.getResult(), extractOp);
|
||||
}
|
||||
return mlir::WalkResult::interrupt();
|
||||
});
|
||||
patterns.insert<PointwiseExtractPattern>(ctx);
|
||||
populateVectorToVectorTransformationPatterns(patterns, ctx);
|
||||
applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
|
||||
}
|
||||
};
|
||||
|
||||
struct TestVectorTransferUnrollingPatterns
|
||||
: public PassWrapper<TestVectorTransferUnrollingPatterns, FunctionPass> {
|
||||
void getDependentDialects(DialectRegistry ®istry) const override {
|
||||
|
@ -264,5 +323,8 @@ void registerTestVectorConversions() {
|
|||
"test-vector-distribute-patterns",
|
||||
"Test conversion patterns to distribute vector ops in the vector "
|
||||
"dialect");
|
||||
PassRegistration<TestVectorToLoopPatterns> vectorToForLoop(
|
||||
"test-vector-to-forloop",
|
||||
"Test conversion patterns to break up a vector op into a for loop");
|
||||
}
|
||||
} // namespace mlir
|
||||
|
|
Loading…
Reference in New Issue