forked from OSchip/llvm-project
[mlir][Vector] Add vector to outerproduct lowering for the [reduction, parallel] case.
Differential Revision: https://reviews.llvm.org/D105373
This commit is contained in:
parent
8b81524c6d
commit
14c1450d5c
|
@ -1324,15 +1324,14 @@ LogicalResult ContractionOpToOuterProductOpLowering::matchAndRewrite(
|
|||
VectorType lhsType = op.getLhsType();
|
||||
Value lhs = op.lhs(), rhs = op.rhs(), res = op.acc();
|
||||
|
||||
// Set up the parallel/reduction structure in right form.
|
||||
AffineExpr m, n, k;
|
||||
bindDims(rewriter.getContext(), m, n, k);
|
||||
|
||||
//
|
||||
// Two outer parallel, one inner reduction (matmat flavor).
|
||||
//
|
||||
UnrolledOuterProductEmitter e(rewriter, op);
|
||||
if (e.iters({Par(), Par(), Red()})) {
|
||||
// Set up the parallel/reduction structure in right form.
|
||||
AffineExpr m, n, k;
|
||||
bindDims(rewriter.getContext(), m, n, k);
|
||||
// Classical row-major matmul: Just permute the lhs.
|
||||
if (e.layout({{m, k}, {k, n}, {m, n}}))
|
||||
return e.outer_prod(e.t(lhs), rhs, res, lhsType.getDimSize(1));
|
||||
|
@ -1367,17 +1366,42 @@ LogicalResult ContractionOpToOuterProductOpLowering::matchAndRewrite(
|
|||
// One outer parallel, one inner reduction (matvec flavor)
|
||||
//
|
||||
if (e.iters({Par(), Red()})) {
|
||||
AffineExpr m, k;
|
||||
bindDims(rewriter.getContext(), m, k);
|
||||
|
||||
// Case mat-vec: transpose.
|
||||
if (e.layout({{m, n}, {n}, {m}}))
|
||||
if (e.layout({{m, k}, {k}, {m}}))
|
||||
return e.outer_prod(e.t(lhs), rhs, res, lhsType.getDimSize(1));
|
||||
// Case mat-trans-vec: ready to go.
|
||||
if (e.layout({{n, m}, {n}, {m}}))
|
||||
if (e.layout({{k, m}, {k}, {m}}))
|
||||
return e.outer_prod(lhs, rhs, res, lhsType.getDimSize(0));
|
||||
// Case vec-mat: swap and transpose.
|
||||
if (e.layout({{n}, {m, n}, {m}}))
|
||||
if (e.layout({{k}, {m, k}, {m}}))
|
||||
return e.outer_prod(e.t(rhs), lhs, res, lhsType.getDimSize(0));
|
||||
// Case vec-mat-trans: swap and ready to go.
|
||||
if (e.layout({{n}, {n, m}, {m}}))
|
||||
if (e.layout({{k}, {k, m}, {m}}))
|
||||
return e.outer_prod(rhs, lhs, res, lhsType.getDimSize(0));
|
||||
return failure();
|
||||
}
|
||||
|
||||
//
|
||||
// One outer reduction, one inner parallel (tmatvec flavor)
|
||||
//
|
||||
if (e.iters({Red(), Par()})) {
|
||||
AffineExpr k, m;
|
||||
bindDims(rewriter.getContext(), k, m);
|
||||
|
||||
// Case mat-vec: transpose.
|
||||
if (e.layout({{m, k}, {k}, {m}}))
|
||||
return e.outer_prod(e.t(lhs), rhs, res, lhsType.getDimSize(1));
|
||||
// Case mat-trans-vec: ready to go.
|
||||
if (e.layout({{k, m}, {k}, {m}}))
|
||||
return e.outer_prod(lhs, rhs, res, lhsType.getDimSize(0));
|
||||
// Case vec-mat: swap and transpose.
|
||||
if (e.layout({{k}, {m, k}, {m}}))
|
||||
return e.outer_prod(e.t(rhs), lhs, res, lhsType.getDimSize(0));
|
||||
// Case vec-mat-trans: swap and ready to go.
|
||||
if (e.layout({{k}, {k, m}, {m}}))
|
||||
return e.outer_prod(rhs, lhs, res, lhsType.getDimSize(0));
|
||||
return failure();
|
||||
}
|
||||
|
|
|
@ -45,6 +45,16 @@
|
|||
iterator_types = ["parallel", "reduction"]
|
||||
}
|
||||
|
||||
#redpar_vecmattrans_accesses = [
|
||||
affine_map<(i, j) -> (i)>,
|
||||
affine_map<(i, j) -> (i, j)>,
|
||||
affine_map<(i, j) -> (j)>
|
||||
]
|
||||
#redpar_vecmattrans_trait = {
|
||||
indexing_maps = #redpar_vecmattrans_accesses,
|
||||
iterator_types = ["reduction", "parallel"]
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @matvec2x2
|
||||
// CHECK-SAME: %[[A:.*0]]: memref<vector<2x2xf32>>
|
||||
// CHECK-SAME: %[[B:.*1]]: memref<vector<2xf32>>
|
||||
|
@ -172,3 +182,28 @@ func @vecmattrans2x2(%arg0: memref<vector<2x2xf32>>, %arg1: memref<vector<2xf32>
|
|||
memref.store %0, %arg2[] : memref<vector<2xf32>>
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @redpar_vecmattrans2x2
|
||||
// CHECK-SAME: %[[A:.*0]]: memref<vector<2x2xf32>>
|
||||
// CHECK-SAME: %[[B:.*1]]: memref<vector<2xf32>>
|
||||
// CHECK-SAME: %[[C:.*2]]: memref<vector<2xf32>>
|
||||
// CHECK: %[[T0:.*]] = memref.load %[[A]][] : memref<vector<2x2xf32>>
|
||||
// CHECK: %[[T1:.*]] = memref.load %[[B]][] : memref<vector<2xf32>>
|
||||
// CHECK: %[[T2:.*]] = memref.load %[[C]][] : memref<vector<2xf32>>
|
||||
// CHECK: %[[T3:.*]] = vector.extract %[[T0]][0] : vector<2x2xf32>
|
||||
// CHECK: %[[T4:.*]] = vector.extract %[[T1]][0] : vector<2xf32>
|
||||
// CHECK: %[[T5:.*]] = vector.outerproduct %[[T3]], %[[T4]], %[[T2]] {kind = #vector.kind<add>} : vector<2xf32>, f32
|
||||
// CHECK: %[[T6:.*]] = vector.extract %[[T0]][1] : vector<2x2xf32>
|
||||
// CHECK: %[[T7:.*]] = vector.extract %[[T1]][1] : vector<2xf32>
|
||||
// CHECK: %[[T8:.*]] = vector.outerproduct %[[T6]], %[[T7]], %[[T5]] {kind = #vector.kind<add>} : vector<2xf32>, f32
|
||||
// CHECK: memref.store %[[T8]], %[[C]][] : memref<vector<2xf32>>
|
||||
// CHECK: return
|
||||
func @redpar_vecmattrans2x2(%arg0: memref<vector<2x2xf32>>, %arg1: memref<vector<2xf32>>,
|
||||
%arg2: memref<vector<2xf32>>) {
|
||||
%A = memref.load %arg0[] : memref<vector<2x2xf32>>
|
||||
%x = memref.load %arg1[] : memref<vector<2xf32>>
|
||||
%b = memref.load %arg2[] : memref<vector<2xf32>>
|
||||
%0 = vector.contract #redpar_vecmattrans_trait %x, %A, %b : vector<2xf32>, vector<2x2xf32> into vector<2xf32>
|
||||
memref.store %0, %arg2[] : memref<vector<2xf32>>
|
||||
return
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue