forked from OSchip/llvm-project
[mlir][linalg] Vectorize linalg.pad_op source copying (improved)
Vectorize linalg.pad_op source copying if source or result shape are static. Differential Revision: https://reviews.llvm.org/D103791
This commit is contained in:
parent
4c2f3d810b
commit
01e3b34469
|
@ -719,15 +719,9 @@ struct GenericPadTensorOpVectorizationPattern
|
||||||
|
|
||||||
auto sourceType = padOp.getSourceType();
|
auto sourceType = padOp.getSourceType();
|
||||||
|
|
||||||
// Copy of source with static shape can be vectorized.
|
// Try vectorizing the copy of source.
|
||||||
if (sourceType.hasStaticShape()) {
|
if (tryVectorizeCopy(rewriter, padOp, padValue, fill).succeeded())
|
||||||
auto vecType = VectorType::get(sourceType.getShape(),
|
|
||||||
sourceType.getElementType());
|
|
||||||
vectorizeStaticShapeSource(rewriter, padOp, fill, vecType);
|
|
||||||
return success();
|
return success();
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Vectorize dynamic source but static destination.
|
|
||||||
|
|
||||||
// Neither source type nor PadTensorOp result type have static shape. Such
|
// Neither source type nor PadTensorOp result type have static shape. Such
|
||||||
// PadTensorOps cannot be vectorized. Generate a SubTensorInsertOp instead.
|
// PadTensorOps cannot be vectorized. Generate a SubTensorInsertOp instead.
|
||||||
|
@ -751,23 +745,57 @@ struct GenericPadTensorOpVectorizationPattern
|
||||||
return success();
|
return success();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Vectorize the copying of a PadTensorOp's source that has static shape.
|
/// Vectorize the copying of a PadTensorOp's source. This is possible if each
|
||||||
void vectorizeStaticShapeSource(PatternRewriter &rewriter, PadTensorOp padOp,
|
/// dimension size is statically know in the source type or the result type
|
||||||
Value dest, VectorType vecType) const {
|
/// (or both).
|
||||||
|
LogicalResult tryVectorizeCopy(PatternRewriter &rewriter, PadTensorOp padOp,
|
||||||
|
Value padValue, Value dest) const {
|
||||||
|
auto sourceType = padOp.getSourceType();
|
||||||
|
auto resultType = padOp.getResultType();
|
||||||
|
|
||||||
|
SmallVector<int64_t> vecShape;
|
||||||
|
SmallVector<bool> readInBounds;
|
||||||
|
SmallVector<bool> writeInBounds;
|
||||||
|
for (unsigned i = 0; i < sourceType.getRank(); ++i) {
|
||||||
|
if (!sourceType.isDynamicDim(i)) {
|
||||||
|
vecShape.push_back(sourceType.getDimSize(i));
|
||||||
|
// Source shape is statically known: Neither read nor write are out-of-
|
||||||
|
// bounds.
|
||||||
|
readInBounds.push_back(true);
|
||||||
|
writeInBounds.push_back(true);
|
||||||
|
} else if (!resultType.isDynamicDim(i)) {
|
||||||
|
// Source shape is not statically known, but result shape is. Vectorize
|
||||||
|
// with size of result shape. This may be larger than the source size.
|
||||||
|
vecShape.push_back(resultType.getDimSize(i));
|
||||||
|
// Read may be out-of-bounds because the result size could be larger
|
||||||
|
// than the source size.
|
||||||
|
readInBounds.push_back(false);
|
||||||
|
// Write is out-of-bounds if low padding > 0.
|
||||||
|
writeInBounds.push_back(
|
||||||
|
isEqualConstantIntOrValue(padOp.getMixedLowPad()[i],
|
||||||
|
rewriter.getIndexAttr(0)));
|
||||||
|
} else {
|
||||||
|
// Neither source nor result dim of padOp is static. Cannot vectorize
|
||||||
|
// the copy.
|
||||||
|
return failure();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto vecType = VectorType::get(vecShape, sourceType.getElementType());
|
||||||
|
|
||||||
// Generate TransferReadOp.
|
// Generate TransferReadOp.
|
||||||
SmallVector<Value> readIndices(
|
SmallVector<Value> readIndices(
|
||||||
vecType.getRank(), rewriter.create<ConstantIndexOp>(padOp.getLoc(), 0));
|
vecType.getRank(), rewriter.create<ConstantIndexOp>(padOp.getLoc(), 0));
|
||||||
auto read = rewriter.create<vector::TransferReadOp>(
|
auto read = rewriter.create<vector::TransferReadOp>(
|
||||||
padOp.getLoc(), vecType, padOp.source(), readIndices);
|
padOp.getLoc(), vecType, padOp.source(), readIndices, padValue,
|
||||||
|
readInBounds);
|
||||||
|
|
||||||
// Generate TransferWriteOp. The destination dimensions may be dynamic, but
|
// Generate TransferWriteOp.
|
||||||
// the write cannot be out-of-bounds. (A large enough destination tensor is
|
|
||||||
// allocated in this pattern.)
|
|
||||||
auto writeIndices = ofrToIndexValues(
|
auto writeIndices = ofrToIndexValues(
|
||||||
rewriter, padOp.getLoc(), padOp.getMixedLowPad());
|
rewriter, padOp.getLoc(), padOp.getMixedLowPad());
|
||||||
SmallVector<bool> inBounds(vecType.getRank(), true);
|
|
||||||
rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
|
rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
|
||||||
padOp, read, dest, writeIndices, inBounds);
|
padOp, read, dest, writeIndices, writeInBounds);
|
||||||
|
|
||||||
|
return success();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -515,12 +515,13 @@ func @matmul_i8_i8_i32(%a: memref<4x6xi8>, %b: memref<6x12xi8>, %c: memref<4x12x
|
||||||
// CHECK-LABEL: func @pad_static(
|
// CHECK-LABEL: func @pad_static(
|
||||||
// CHECK-SAME: %[[ARG0:.*]]: tensor<2x?x2xf32>, %[[PAD:.*]]: f32
|
// CHECK-SAME: %[[ARG0:.*]]: tensor<2x?x2xf32>, %[[PAD:.*]]: f32
|
||||||
// CHECK-NOT: linalg.pad_tensor
|
// CHECK-NOT: linalg.pad_tensor
|
||||||
// CHECK-DAG: %[[C1:.*]] = constant 1 : index
|
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
|
||||||
|
// CHECK-DAG: %[[C2:.*]] = constant 2 : index
|
||||||
// CHECK-DAG: %[[INIT:.*]] = linalg.init_tensor [2, 3, 4] : tensor<2x3x4xf32>
|
// CHECK-DAG: %[[INIT:.*]] = linalg.init_tensor [2, 3, 4] : tensor<2x3x4xf32>
|
||||||
// CHECK-DAG: %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x3x4xf32>
|
// CHECK-DAG: %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x3x4xf32>
|
||||||
// CHECK: %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]]{{.*}} : vector<2x3x4xf32>, tensor<2x3x4xf32>
|
// CHECK: %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]]{{.*}} : vector<2x3x4xf32>, tensor<2x3x4xf32>
|
||||||
// CHECK-DAG: %[[DIM1:.*]] = memref.dim %[[ARG0]], %[[C1]]
|
// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, false, true]} : tensor<2x?x2xf32>, vector<2x3x2xf32>
|
||||||
// CHECK: %[[RESULT:.*]] = subtensor_insert %[[ARG0]] into %2[0, 0, 2] [2, %[[DIM1]], 2] [1, 1, 1] : tensor<2x?x2xf32> into tensor<2x3x4xf32>
|
// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x3x2xf32>, tensor<2x3x4xf32>
|
||||||
// CHECK: return %[[RESULT]]
|
// CHECK: return %[[RESULT]]
|
||||||
func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
|
func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
|
||||||
%0 = linalg.pad_tensor %arg0 low[0, 0, 2] high[0, 1, 0] {
|
%0 = linalg.pad_tensor %arg0 low[0, 0, 2] high[0, 1, 0] {
|
||||||
|
|
Loading…
Reference in New Issue