forked from OSchip/llvm-project
[mlir] Linalg: Extend promotion to non f32 buffers.
Summary: Linalg's promotion pass was only supporting f32 buffers due to how the zero value was build for the `fill` operation. Moreover, `promoteSubViewOperands` was returning a vector with one entry per float subview while omitting integer subviews. For a program with only integer subviews the return vector would be of size 0. However, `promoteSubViewsOperands` would try to access a non zero number of entries of this vector, resulting in a sefgault. Reviewers: nicolasvasilache, ftynse Reviewed By: ftynse Subscribers: mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, liufengdb, Joonsoo, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D74532
This commit is contained in:
parent
dad5f00e3b
commit
0acd7e02f2
|
@ -138,6 +138,8 @@ using StdIndexedValue =
|
|||
|
||||
using folded_std_constant_index = folded::ValueBuilder<ConstantIndexOp>;
|
||||
using folded_std_constant_float = folded::ValueBuilder<ConstantFloatOp>;
|
||||
using folded_std_constant_int = folded::ValueBuilder<ConstantIntOp>;
|
||||
using folded_std_constant = folded::ValueBuilder<ConstantOp>;
|
||||
using folded_std_dim = folded::ValueBuilder<DimOp>;
|
||||
using folded_std_muli = folded::ValueBuilder<MulIOp>;
|
||||
} // namespace intrinsics
|
||||
|
|
|
@ -121,10 +121,6 @@ mlir::linalg::promoteSubViews(OpBuilder &b, Location loc,
|
|||
DenseMap<Value, PromotionInfo> promotionInfoMap;
|
||||
for (auto v : subViews) {
|
||||
SubViewOp subView = cast<SubViewOp>(v.getDefiningOp());
|
||||
auto viewType = subView.getType();
|
||||
// TODO(ntv): support more cases than just float.
|
||||
if (!viewType.getElementType().isa<FloatType>())
|
||||
continue;
|
||||
auto promotionInfo =
|
||||
promoteFullTileBuffer(b, loc, subView, dynamicBuffers, folder);
|
||||
promotionInfoMap.insert(std::make_pair(subView.getResult(), promotionInfo));
|
||||
|
@ -136,10 +132,12 @@ mlir::linalg::promoteSubViews(OpBuilder &b, Location loc,
|
|||
auto info = promotionInfoMap.find(v);
|
||||
if (info == promotionInfoMap.end())
|
||||
continue;
|
||||
// TODO(ntv): value to fill with should be related to the operation.
|
||||
// For now, just use APFloat(0.0f).
|
||||
auto t = subView.getType().getElementType().cast<FloatType>();
|
||||
Value fillVal = folded_std_constant_float(folder, APFloat(0.0f), t);
|
||||
Value fillVal;
|
||||
if (auto t = subView.getType().getElementType().dyn_cast<FloatType>())
|
||||
fillVal = folded_std_constant(folder, FloatAttr::get(t, 0.0));
|
||||
else if (auto t =
|
||||
subView.getType().getElementType().dyn_cast<IntegerType>())
|
||||
fillVal = folded_std_constant_int(folder, 0, t);
|
||||
// TODO(ntv): fill is only necessary if `promotionInfo` has a full local
|
||||
// view that is different from the partial local view and we are on the
|
||||
// boundary.
|
||||
|
@ -214,13 +212,14 @@ static void promoteSubViews(FuncOp f, bool dynamicBuffers) {
|
|||
if (!op.hasBufferSemantics())
|
||||
return;
|
||||
|
||||
// TODO(ntv) some heuristic here to decide what to promote. Atm it is all or
|
||||
// nothing.
|
||||
// TODO(ntv) some heuristic here to decide what to promote. Atm only float
|
||||
// and integer buffers can be promoted.
|
||||
SetVector<Value> subViews;
|
||||
OpBuilder b(op);
|
||||
for (auto it : op.getInputsAndOutputBuffers())
|
||||
if (auto sv = dyn_cast_or_null<SubViewOp>(it.getDefiningOp()))
|
||||
subViews.insert(sv);
|
||||
if (sv.getType().getElementType().isIntOrFloat())
|
||||
subViews.insert(sv);
|
||||
if (!subViews.empty()) {
|
||||
promoteSubViewOperands(b, op, subViews, dynamicBuffers, &folder);
|
||||
toErase.push_back(op);
|
||||
|
|
|
@ -10,34 +10,32 @@
|
|||
// CHECK-DAG: #[[strided2DnoOffset:.*]] = affine_map<(d0, d1)[s0] -> (d0 * s0 + d1)>
|
||||
// CHECK-DAG: #[[strided2D_dynamic:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
|
||||
|
||||
module {
|
||||
func @matmul(%A: memref<?xi8>, %M: index, %N: index, %K: index) {
|
||||
%c4 = constant 4 : index
|
||||
%c3 = constant 3 : index
|
||||
%c2 = constant 2 : index
|
||||
%c0 = constant 0 : index
|
||||
%c1 = constant 1 : index
|
||||
%3 = view %A[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32, #map0>
|
||||
%4 = view %A[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32, #map0>
|
||||
%5 = view %A[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32, #map0>
|
||||
%6 = dim %3, 0 : memref<?x?xf32, #map0>
|
||||
%7 = dim %3, 1 : memref<?x?xf32, #map0>
|
||||
%8 = dim %4, 1 : memref<?x?xf32, #map0>
|
||||
loop.for %arg4 = %c0 to %6 step %c2 {
|
||||
loop.for %arg5 = %c0 to %8 step %c3 {
|
||||
loop.for %arg6 = %c0 to %7 step %c4 {
|
||||
%11 = std.subview %3[%arg4, %arg6][%c2, %c4][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, offset: ?, strides: [?, ?]>
|
||||
%14 = std.subview %4[%arg6, %arg5][%c4, %c3][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, offset: ?, strides: [?, ?]>
|
||||
%17 = std.subview %5[%arg4, %arg5][%c2, %c3][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, offset: ?, strides: [?, ?]>
|
||||
linalg.matmul(%11, %14, %17) : memref<?x?xf32, offset: ?, strides: [?, ?]>, memref<?x?xf32, offset: ?, strides: [?, ?]>, memref<?x?xf32, offset: ?, strides: [?, ?]>
|
||||
}
|
||||
func @matmul_f32(%A: memref<?xi8>, %M: index, %N: index, %K: index) {
|
||||
%c4 = constant 4 : index
|
||||
%c3 = constant 3 : index
|
||||
%c2 = constant 2 : index
|
||||
%c0 = constant 0 : index
|
||||
%c1 = constant 1 : index
|
||||
%3 = view %A[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32, #map0>
|
||||
%4 = view %A[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32, #map0>
|
||||
%5 = view %A[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32, #map0>
|
||||
%6 = dim %3, 0 : memref<?x?xf32, #map0>
|
||||
%7 = dim %3, 1 : memref<?x?xf32, #map0>
|
||||
%8 = dim %4, 1 : memref<?x?xf32, #map0>
|
||||
loop.for %arg4 = %c0 to %6 step %c2 {
|
||||
loop.for %arg5 = %c0 to %8 step %c3 {
|
||||
loop.for %arg6 = %c0 to %7 step %c4 {
|
||||
%11 = std.subview %3[%arg4, %arg6][%c2, %c4][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, offset: ?, strides: [?, ?]>
|
||||
%14 = std.subview %4[%arg6, %arg5][%c4, %c3][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, offset: ?, strides: [?, ?]>
|
||||
%17 = std.subview %5[%arg4, %arg5][%c2, %c3][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, offset: ?, strides: [?, ?]>
|
||||
linalg.matmul(%11, %14, %17) : memref<?x?xf32, offset: ?, strides: [?, ?]>, memref<?x?xf32, offset: ?, strides: [?, ?]>, memref<?x?xf32, offset: ?, strides: [?, ?]>
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @matmul(%{{.*}}: memref<?xi8>, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
|
||||
// CHECK-LABEL: func @matmul_f32(%{{.*}}: memref<?xi8>, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
|
||||
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
|
||||
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
|
||||
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
|
||||
|
@ -74,3 +72,133 @@ module {
|
|||
// CHECK: dealloc %[[tmpA]] : memref<32xi8>
|
||||
// CHECK: dealloc %[[tmpB]] : memref<48xi8>
|
||||
// CHECK: dealloc %[[tmpC]] : memref<24xi8>
|
||||
|
||||
// -----
|
||||
|
||||
func @matmul_f64(%A: memref<?xi8>, %M: index, %N: index, %K: index) {
|
||||
%c4 = constant 4 : index
|
||||
%c3 = constant 3 : index
|
||||
%c2 = constant 2 : index
|
||||
%c0 = constant 0 : index
|
||||
%c1 = constant 1 : index
|
||||
%3 = view %A[%c0][%M, %K] : memref<?xi8> to memref<?x?xf64, #map0>
|
||||
%4 = view %A[%c0][%K, %N] : memref<?xi8> to memref<?x?xf64, #map0>
|
||||
%5 = view %A[%c0][%M, %N] : memref<?xi8> to memref<?x?xf64, #map0>
|
||||
%6 = dim %3, 0 : memref<?x?xf64, #map0>
|
||||
%7 = dim %3, 1 : memref<?x?xf64, #map0>
|
||||
%8 = dim %4, 1 : memref<?x?xf64, #map0>
|
||||
loop.for %arg4 = %c0 to %6 step %c2 {
|
||||
loop.for %arg5 = %c0 to %8 step %c3 {
|
||||
loop.for %arg6 = %c0 to %7 step %c4 {
|
||||
%11 = std.subview %3[%arg4, %arg6][%c2, %c4][%c1, %c1] : memref<?x?xf64, #map0> to memref<?x?xf64, offset: ?, strides: [?, ?]>
|
||||
%14 = std.subview %4[%arg6, %arg5][%c4, %c3][%c1, %c1] : memref<?x?xf64, #map0> to memref<?x?xf64, offset: ?, strides: [?, ?]>
|
||||
%17 = std.subview %5[%arg4, %arg5][%c2, %c3][%c1, %c1] : memref<?x?xf64, #map0> to memref<?x?xf64, offset: ?, strides: [?, ?]>
|
||||
linalg.matmul(%11, %14, %17) : memref<?x?xf64, offset: ?, strides: [?, ?]>, memref<?x?xf64, offset: ?, strides: [?, ?]>, memref<?x?xf64, offset: ?, strides: [?, ?]>
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @matmul_f64(%{{.*}}: memref<?xi8>, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
|
||||
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
|
||||
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
|
||||
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
|
||||
// CHECK: %[[vA_f64:.*]] = std.subview {{.*}} : memref<?x?xf64, #[[strided2D]]>
|
||||
// CHECK: %[[vB_f64:.*]] = std.subview {{.*}} : memref<?x?xf64, #[[strided2D]]>
|
||||
// CHECK: %[[vC_f64:.*]] = std.subview {{.*}} : memref<?x?xf64, #[[strided2D]]>
|
||||
///
|
||||
// CHECK: %[[tmpA_f64:.*]] = alloc() : memref<64xi8>
|
||||
// CHECK: %[[fullA_f64:.*]] = std.view %[[tmpA_f64]][][{{.*}}] : memref<64xi8> to memref<?x?xf64>
|
||||
// DYNAMIC: std.view %{{.*}}[][{{.*}}] : memref<?xi8> to memref<?x?xf64>
|
||||
// CHECK: %[[partialA_f64:.*]] = linalg.slice %[[fullA_f64]][%{{.*}}, %{{.*}}] : memref<?x?xf64>, !linalg.range, !linalg.range, memref<?x?xf64, #[[strided2DnoOffset]]>
|
||||
///
|
||||
// CHECK: %[[tmpB_f64:.*]] = alloc() : memref<96xi8>
|
||||
// CHECK: %[[fullB_f64:.*]] = std.view %[[tmpB_f64]][][{{.*}}] : memref<96xi8> to memref<?x?xf64>
|
||||
// DYNAMIC: std.view %{{.*}}[][{{.*}}] : memref<?xi8> to memref<?x?xf64>
|
||||
// CHECK: %[[partialB_f64:.*]] = linalg.slice %[[fullB_f64]][%{{.*}}, %{{.*}}] : memref<?x?xf64>, !linalg.range, !linalg.range, memref<?x?xf64, #[[strided2DnoOffset]]>
|
||||
///
|
||||
// CHECK: %[[tmpC_f64:.*]] = alloc() : memref<48xi8>
|
||||
// CHECK: %[[fullC_f64:.*]] = std.view %[[tmpC_f64]][][{{.*}}] : memref<48xi8> to memref<?x?xf64>
|
||||
// DYNAMIC: std.view %{{.*}}[][{{.*}}] : memref<?xi8> to memref<?x?xf64>
|
||||
// CHECK: %[[partialC_f64:.*]] = linalg.slice %[[fullC_f64]][%{{.*}}, %{{.*}}] : memref<?x?xf64>, !linalg.range, !linalg.range, memref<?x?xf64, #[[strided2DnoOffset]]>
|
||||
|
||||
// CHECK: linalg.fill(%[[fullA_f64]], {{.*}}) : memref<?x?xf64>, f64
|
||||
// CHECK: linalg.fill(%[[fullB_f64]], {{.*}}) : memref<?x?xf64>, f64
|
||||
// CHECK: linalg.fill(%[[fullC_f64]], {{.*}}) : memref<?x?xf64>, f64
|
||||
// CHECK: linalg.copy(%[[vA_f64]], %[[partialA_f64]]) : memref<?x?xf64, #[[strided2D_dynamic]]>, memref<?x?xf64, #[[strided2DnoOffset]]>
|
||||
// CHECK: linalg.copy(%[[vB_f64]], %[[partialB_f64]]) : memref<?x?xf64, #[[strided2D_dynamic]]>, memref<?x?xf64, #[[strided2DnoOffset]]>
|
||||
// CHECK: linalg.copy(%[[vC_f64]], %[[partialC_f64]]) : memref<?x?xf64, #[[strided2D_dynamic]]>, memref<?x?xf64, #[[strided2DnoOffset]]>
|
||||
//
|
||||
// CHECK: linalg.matmul(%[[fullA_f64]], %[[fullB_f64]], %[[fullC_f64]]) : memref<?x?xf64>, memref<?x?xf64>, memref<?x?xf64>
|
||||
//
|
||||
// CHECK: linalg.copy(%[[partialC_f64]], %[[vC_f64]]) : memref<?x?xf64, #[[strided2DnoOffset]]>, memref<?x?xf64, #[[strided2D_dynamic]]>
|
||||
//
|
||||
// CHECK: dealloc %[[tmpA_f64]] : memref<64xi8>
|
||||
// CHECK: dealloc %[[tmpB_f64]] : memref<96xi8>
|
||||
// CHECK: dealloc %[[tmpC_f64]] : memref<48xi8>
|
||||
|
||||
// -----
|
||||
|
||||
func @matmul_i32(%A: memref<?xi8>, %M: index, %N: index, %K: index) {
|
||||
%c4 = constant 4 : index
|
||||
%c3 = constant 3 : index
|
||||
%c2 = constant 2 : index
|
||||
%c0 = constant 0 : index
|
||||
%c1 = constant 1 : index
|
||||
%3 = view %A[%c0][%M, %K] : memref<?xi8> to memref<?x?xi32, #map0>
|
||||
%4 = view %A[%c0][%K, %N] : memref<?xi8> to memref<?x?xi32, #map0>
|
||||
%5 = view %A[%c0][%M, %N] : memref<?xi8> to memref<?x?xi32, #map0>
|
||||
%6 = dim %3, 0 : memref<?x?xi32, #map0>
|
||||
%7 = dim %3, 1 : memref<?x?xi32, #map0>
|
||||
%8 = dim %4, 1 : memref<?x?xi32, #map0>
|
||||
loop.for %arg4 = %c0 to %6 step %c2 {
|
||||
loop.for %arg5 = %c0 to %8 step %c3 {
|
||||
loop.for %arg6 = %c0 to %7 step %c4 {
|
||||
%11 = std.subview %3[%arg4, %arg6][%c2, %c4][%c1, %c1] : memref<?x?xi32, #map0> to memref<?x?xi32, offset: ?, strides: [?, ?]>
|
||||
%14 = std.subview %4[%arg6, %arg5][%c4, %c3][%c1, %c1] : memref<?x?xi32, #map0> to memref<?x?xi32, offset: ?, strides: [?, ?]>
|
||||
%17 = std.subview %5[%arg4, %arg5][%c2, %c3][%c1, %c1] : memref<?x?xi32, #map0> to memref<?x?xi32, offset: ?, strides: [?, ?]>
|
||||
linalg.matmul(%11, %14, %17) : memref<?x?xi32, offset: ?, strides: [?, ?]>, memref<?x?xi32, offset: ?, strides: [?, ?]>, memref<?x?xi32, offset: ?, strides: [?, ?]>
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @matmul_i32(%{{.*}}: memref<?xi8>, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
|
||||
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
|
||||
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
|
||||
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
|
||||
// CHECK: %[[vA_i32:.*]] = std.subview {{.*}} : memref<?x?xi32, #[[strided2D]]>
|
||||
// CHECK: %[[vB_i32:.*]] = std.subview {{.*}} : memref<?x?xi32, #[[strided2D]]>
|
||||
// CHECK: %[[vC_i32:.*]] = std.subview {{.*}} : memref<?x?xi32, #[[strided2D]]>
|
||||
///
|
||||
// CHECK: %[[tmpA_i32:.*]] = alloc() : memref<32xi8>
|
||||
// CHECK: %[[fullA_i32:.*]] = std.view %[[tmpA_i32]][][{{.*}}] : memref<32xi8> to memref<?x?xi32>
|
||||
// DYNAMIC: std.view %{{.*}}[][{{.*}}] : memref<?xi8> to memref<?x?xi32>
|
||||
// CHECK: %[[partialA_i32:.*]] = linalg.slice %[[fullA_i32]][%{{.*}}, %{{.*}}] : memref<?x?xi32>, !linalg.range, !linalg.range, memref<?x?xi32, #[[strided2DnoOffset]]>
|
||||
///
|
||||
// CHECK: %[[tmpB_i32:.*]] = alloc() : memref<48xi8>
|
||||
// CHECK: %[[fullB_i32:.*]] = std.view %[[tmpB_i32]][][{{.*}}] : memref<48xi8> to memref<?x?xi32>
|
||||
// DYNAMIC: std.view %{{.*}}[][{{.*}}] : memref<?xi8> to memref<?x?xi32>
|
||||
// CHECK: %[[partialB_i32:.*]] = linalg.slice %[[fullB_i32]][%{{.*}}, %{{.*}}] : memref<?x?xi32>, !linalg.range, !linalg.range, memref<?x?xi32, #[[strided2DnoOffset]]>
|
||||
///
|
||||
// CHECK: %[[tmpC_i32:.*]] = alloc() : memref<24xi8>
|
||||
// CHECK: %[[fullC_i32:.*]] = std.view %[[tmpC_i32]][][{{.*}}] : memref<24xi8> to memref<?x?xi32>
|
||||
// DYNAMIC: std.view %{{.*}}[][{{.*}}] : memref<?xi8> to memref<?x?xi32>
|
||||
// CHECK: %[[partialC_i32:.*]] = linalg.slice %[[fullC_i32]][%{{.*}}, %{{.*}}] : memref<?x?xi32>, !linalg.range, !linalg.range, memref<?x?xi32, #[[strided2DnoOffset]]>
|
||||
|
||||
// CHECK: linalg.fill(%[[fullA_i32]], {{.*}}) : memref<?x?xi32>, i32
|
||||
// CHECK: linalg.fill(%[[fullB_i32]], {{.*}}) : memref<?x?xi32>, i32
|
||||
// CHECK: linalg.fill(%[[fullC_i32]], {{.*}}) : memref<?x?xi32>, i32
|
||||
// CHECK: linalg.copy(%[[vA_i32]], %[[partialA_i32]]) : memref<?x?xi32, #[[strided2D_dynamic]]>, memref<?x?xi32, #[[strided2DnoOffset]]>
|
||||
// CHECK: linalg.copy(%[[vB_i32]], %[[partialB_i32]]) : memref<?x?xi32, #[[strided2D_dynamic]]>, memref<?x?xi32, #[[strided2DnoOffset]]>
|
||||
// CHECK: linalg.copy(%[[vC_i32]], %[[partialC_i32]]) : memref<?x?xi32, #[[strided2D_dynamic]]>, memref<?x?xi32, #[[strided2DnoOffset]]>
|
||||
//
|
||||
// CHECK: linalg.matmul(%[[fullA_i32]], %[[fullB_i32]], %[[fullC_i32]]) : memref<?x?xi32>, memref<?x?xi32>, memref<?x?xi32>
|
||||
//
|
||||
// CHECK: linalg.copy(%[[partialC_i32]], %[[vC_i32]]) : memref<?x?xi32, #[[strided2DnoOffset]]>, memref<?x?xi32, #[[strided2D_dynamic]]>
|
||||
//
|
||||
// CHECK: dealloc %[[tmpA_i32]] : memref<32xi8>
|
||||
// CHECK: dealloc %[[tmpB_i32]] : memref<48xi8>
|
||||
// CHECK: dealloc %[[tmpC_i32]] : memref<24xi8>
|
||||
|
|
Loading…
Reference in New Issue