forked from OSchip/llvm-project
[mlir][linalg] Lower PadTensorOps with non-constant pad value
The padding of such ops is not generated in a vectorized way. Instead, emit a tensor::GenerateOp. We may vectorize GenerateOps in the future. Differential Revision: https://reviews.llvm.org/D103879
This commit is contained in:
parent
73cbc91c93
commit
ddda52ce3c
|
@ -689,10 +689,6 @@ struct GenericPadTensorOpVectorizationPattern
|
|||
padOp.getLoc(), getIntFromAttr(ofr.get<Attribute>())).getResult();
|
||||
};
|
||||
|
||||
// Pad value must be a constant.
|
||||
auto padValue = padOp.getConstantPaddingValue();
|
||||
if (!padValue) return failure();
|
||||
|
||||
auto resultType = padOp.getResultType();
|
||||
// Compute size of InitTensorOp. Any combination of static/dynamic is
|
||||
// supported.
|
||||
|
@ -712,20 +708,20 @@ struct GenericPadTensorOpVectorizationPattern
|
|||
staticSizes.push_back(resultType.getDimSize(dim));
|
||||
}
|
||||
|
||||
// Init tensor and fill it with padding.
|
||||
Value init = rewriter.create<InitTensorOp>(
|
||||
padOp.getLoc(), dynSizes, staticSizes, resultType.getElementType());
|
||||
Value fill =
|
||||
rewriter.create<FillOp>(padOp.getLoc(), init, padValue).result();
|
||||
|
||||
auto sourceType = padOp.getSourceType();
|
||||
Value fill = tryVectorizeFill(rewriter, padOp, init, dynSizes);
|
||||
|
||||
// Try vectorizing the copy of source.
|
||||
if (tryVectorizeCopy(rewriter, padOp, padValue, fill).succeeded())
|
||||
if (tryVectorizeCopy(rewriter, padOp, fill).succeeded())
|
||||
return success();
|
||||
|
||||
// Neither source type nor PadTensorOp result type have static shape. Such
|
||||
// PadTensorOps cannot be vectorized. Generate a SubTensorInsertOp instead.
|
||||
// PadTensorOps cannot be vectorized. Generate a SubTensorInsertOp instead
|
||||
// for copying the PadOp source.
|
||||
|
||||
auto sourceType = padOp.getSourceType();
|
||||
// Compute size of source of PadTensorOp.
|
||||
SmallVector<OpFoldResult> srcSizes;
|
||||
for (unsigned dim = 0; dim < sourceType.getRank(); ++dim) {
|
||||
|
@ -745,14 +741,54 @@ struct GenericPadTensorOpVectorizationPattern
|
|||
return success();
|
||||
}
|
||||
|
||||
/// Vectorize the filling of `dest`. This is possible if the padOp is padding
|
||||
/// with a constant value. Otherwise, generate a tensor::GenerateOp.
|
||||
Value tryVectorizeFill(PatternRewriter &rewriter, PadTensorOp padOp,
|
||||
Value dest, const SmallVector<Value> &dynSizes) const {
|
||||
// Fill can be vectorized if padValue is a constant. (If there is enough
|
||||
// static type information, the FillOp will be vectorized by another
|
||||
// pattern.)
|
||||
auto padValue = padOp.getConstantPaddingValue();
|
||||
if (padValue)
|
||||
return rewriter.create<FillOp>(padOp.getLoc(), dest, padValue).result();
|
||||
|
||||
// Fill could not be vectorized: Lower to tensor::GenerateOp with region.
|
||||
auto generateOp = rewriter.create<tensor::GenerateOp>(
|
||||
padOp.getLoc(), padOp.getResultType(), dynSizes);
|
||||
// Copy region to new op.
|
||||
BlockAndValueMapping bvm;
|
||||
padOp.region().cloneInto(&generateOp.getRegion(), bvm);
|
||||
// Rewrite linalg::YieldOp to tensor::YieldOp.
|
||||
OpBuilder::InsertionGuard guard(rewriter);
|
||||
auto yieldOp = dyn_cast<linalg::YieldOp>(
|
||||
generateOp.getRegion().front().getTerminator());
|
||||
assert(yieldOp && "malformed PadTensorOp: expected YieldOp terminator");
|
||||
assert(yieldOp.values().size() == 1);
|
||||
rewriter.setInsertionPoint(yieldOp);
|
||||
rewriter.replaceOpWithNewOp<tensor::YieldOp>(yieldOp, yieldOp.values()[0]);
|
||||
return generateOp;
|
||||
}
|
||||
|
||||
/// Vectorize the copying of a PadTensorOp's source. This is possible if each
|
||||
/// dimension size is statically know in the source type or the result type
|
||||
/// (or both).
|
||||
LogicalResult tryVectorizeCopy(PatternRewriter &rewriter, PadTensorOp padOp,
|
||||
Value padValue, Value dest) const {
|
||||
Value dest) const {
|
||||
auto sourceType = padOp.getSourceType();
|
||||
auto resultType = padOp.getResultType();
|
||||
|
||||
// Copy cannot be vectorized if pad value is non-constant and source shape
|
||||
// is dynamic. In case of a dynamic source shape, padding must be appended
|
||||
// by TransferReadOp, but TransferReadOp supports only constant padding.
|
||||
auto padValue = padOp.getConstantPaddingValue();
|
||||
if (!padValue) {
|
||||
if (!sourceType.hasStaticShape()) return failure();
|
||||
// Create dummy padding value.
|
||||
auto elemType = sourceType.getElementType();
|
||||
padValue = rewriter.create<ConstantOp>(padOp.getLoc(), elemType,
|
||||
rewriter.getZeroAttr(elemType));
|
||||
}
|
||||
|
||||
SmallVector<int64_t> vecShape;
|
||||
SmallVector<bool> readInBounds;
|
||||
SmallVector<bool> writeInBounds;
|
||||
|
|
|
@ -674,6 +674,35 @@ func @pad_and_subtensor_insert(
|
|||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @pad_tensor_non_const_pad_value
|
||||
// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32>
|
||||
// CHECK-NOT: linalg.pad_tensor
|
||||
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-DAG: %[[C3:.*]] = constant 3 : index
|
||||
// CHECK-DAG: %[[C4:.*]] = constant 4 : index
|
||||
// CHECK: %[[FILL:.*]] = tensor.generate
|
||||
// CHECK: %[[RES:.*]] = mulf
|
||||
// CHECK: tensor.yield %[[RES]] : f32
|
||||
// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true]} : tensor<5x6xf32>, vector<5x6xf32>
|
||||
// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C3]], %[[C4]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<12x13xf32>
|
||||
// CHECK: return %[[WRITE]]
|
||||
func @pad_tensor_non_const_pad_value(%arg0: tensor<5x6xf32>) -> tensor<12x13xf32> {
|
||||
%c0 = constant 0 : index
|
||||
%c5 = constant 5.0 : f32
|
||||
%0 = linalg.pad_tensor %arg0 low[3, 4] high[4, 3] {
|
||||
^bb0(%arg1: index, %arg2: index):
|
||||
%i1 = index_cast %arg1 : index to i32
|
||||
%i2 = index_cast %arg2 : index to i32
|
||||
%f1 = sitofp %i1 : i32 to f32
|
||||
%f2 = sitofp %i2 : i32 to f32
|
||||
%m = mulf %f1, %f2 : f32
|
||||
linalg.yield %m : f32
|
||||
} : tensor<5x6xf32> to tensor<12x13xf32>
|
||||
return %0 : tensor<12x13xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-DAG: #[[$M0:.*]] = affine_map<(d0, d1) -> (d0, d1, 0)>
|
||||
|
||||
// CHECK-LABEL: func @sum_exp
|
||||
|
|
Loading…
Reference in New Issue