forked from OSchip/llvm-project
Fix CombineContractBroadcast folding reduction iterators.
Fix CombineContractBroadcast folding reduction iterators. Differential Revision: https://reviews.llvm.org/D128739
This commit is contained in:
parent
079c8ccb33
commit
694ad3eaef
|
@ -32,6 +32,7 @@
|
||||||
#include "llvm/ADT/DenseSet.h"
|
#include "llvm/ADT/DenseSet.h"
|
||||||
#include "llvm/ADT/MapVector.h"
|
#include "llvm/ADT/MapVector.h"
|
||||||
#include "llvm/ADT/STLExtras.h"
|
#include "llvm/ADT/STLExtras.h"
|
||||||
|
#include "llvm/ADT/SmallBitVector.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
@ -1126,6 +1127,21 @@ struct CombineContractBroadcast
|
||||||
// relaxed we can remove this case.
|
// relaxed we can remove this case.
|
||||||
if (innerDimBroadcast)
|
if (innerDimBroadcast)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
// It would be incorrect to fold a broadcast onto a reduction dimension
|
||||||
|
// of non-unit size.
|
||||||
|
bool nonUnitDimReductionBroadcast = false;
|
||||||
|
for (int64_t i = 0; i < rankDiff; ++i) {
|
||||||
|
if (broadcast.getVectorType().getDimSize(i) != 1 &&
|
||||||
|
isReductionIterator(contractOp.getIteratorTypes()
|
||||||
|
.getValue()[map.getDimPosition(i)])) {
|
||||||
|
nonUnitDimReductionBroadcast = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (nonUnitDimReductionBroadcast)
|
||||||
|
continue;
|
||||||
|
|
||||||
AffineMap broadcastMap =
|
AffineMap broadcastMap =
|
||||||
AffineMap::get(broadcast.getVectorType().getRank(), 0, originalDims,
|
AffineMap::get(broadcast.getVectorType().getRank(), 0, originalDims,
|
||||||
contractOp.getContext());
|
contractOp.getContext());
|
||||||
|
@ -1133,11 +1149,40 @@ struct CombineContractBroadcast
|
||||||
*operand = broadcast.getSource();
|
*operand = broadcast.getSource();
|
||||||
changed = true;
|
changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!changed)
|
if (!changed)
|
||||||
return failure();
|
return failure();
|
||||||
|
|
||||||
|
// Determine which dims are usused, now that the maps have been composed
|
||||||
|
// with the broadcast maps.
|
||||||
|
unsigned numDims = maps[0].getNumDims();
|
||||||
|
llvm::SmallBitVector unusedDims(numDims, true);
|
||||||
|
for (const auto &m : maps) {
|
||||||
|
for (unsigned i = 0; i < numDims; ++i) {
|
||||||
|
if (m.isFunctionOfDim(i))
|
||||||
|
unusedDims.reset(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Compress unused dims.
|
||||||
|
for (auto &m : maps)
|
||||||
|
m = compressDims(m, unusedDims);
|
||||||
|
// Compute the combined iterators.
|
||||||
|
SmallVector<Attribute, 4> iterators;
|
||||||
|
for (unsigned i = 0; i < numDims; ++i) {
|
||||||
|
if (!unusedDims.test(i))
|
||||||
|
iterators.push_back(contractOp.getIteratorTypes().getValue()[i]);
|
||||||
|
}
|
||||||
|
// Check that compressing unused dims isn't removing all reduction
|
||||||
|
// iterators. For example, if the vector.contract had only one reduction
|
||||||
|
// iterator and that was a unit-dimension created by a broadcast,
|
||||||
|
// then we should bail here, otherwise we would create a contract without
|
||||||
|
// a reduction iterator.
|
||||||
|
if (!llvm::any_of(iterators, isReductionIterator))
|
||||||
|
return failure();
|
||||||
|
|
||||||
rewriter.replaceOpWithNewOp<vector::ContractionOp>(
|
rewriter.replaceOpWithNewOp<vector::ContractionOp>(
|
||||||
contractOp, lhs, rhs, contractOp.getAcc(),
|
contractOp, lhs, rhs, contractOp.getAcc(),
|
||||||
rewriter.getAffineMapArrayAttr(maps), contractOp.getIteratorTypes());
|
rewriter.getAffineMapArrayAttr(maps), rewriter.getArrayAttr(iterators));
|
||||||
return success();
|
return success();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -86,6 +86,98 @@ func.func @contract_broadcast(
|
||||||
return %1 : vector<8x32xf32>
|
return %1 : vector<8x32xf32>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
// Test that CombineContractBroadcast is able to combine a broadcast that
|
||||||
|
// creates a unit dim that is consumed by a reduction iterator, dropping that
|
||||||
|
// reduction iterator, as long as there is another reduction iterator left.
|
||||||
|
|
||||||
|
#map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
|
||||||
|
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
|
||||||
|
#map2 = affine_map<(d0, d1, d2, d3) -> (d1, d2)>
|
||||||
|
|
||||||
|
// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)>
|
||||||
|
// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0, d1, d2) -> (d1, d2)>
|
||||||
|
// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>
|
||||||
|
|
||||||
|
// CHECK-LABEL: contract_broadcast_unit_dim_reduction
|
||||||
|
// CHECK-SAME: (%[[ARG0:.+]]: vector<8x4xi32>, %[[ARG1:.+]]: vector<8x4xi32>, %[[ARG2:.+]]: vector<8x8xi32>)
|
||||||
|
// CHECK: vector.contract
|
||||||
|
// CHECK-SAME: indexing_maps = [#[[$map0]], #[[$map1]], #[[$map2]]]
|
||||||
|
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]
|
||||||
|
// CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]] : vector<8x4xi32>, vector<8x4xi32> into vector<8x8xi32>
|
||||||
|
func.func @contract_broadcast_unit_dim_reduction(%arg0 : vector<8x4xi32>, %arg1 : vector<8x4xi32>, %arg2 : vector<8x8xi32>) -> vector<8x8xi32> {
|
||||||
|
%0 = vector.broadcast %arg0 : vector<8x4xi32> to vector<1x8x4xi32>
|
||||||
|
%1 = vector.broadcast %arg1 : vector<8x4xi32> to vector<1x8x4xi32>
|
||||||
|
%result = vector.contract {
|
||||||
|
indexing_maps = [#map0, #map1, #map2],
|
||||||
|
iterator_types = ["reduction", "parallel", "parallel", "reduction"],
|
||||||
|
kind = #vector.kind<add>
|
||||||
|
} %0, %1, %arg2 : vector<1x8x4xi32>, vector<1x8x4xi32> into vector<8x8xi32>
|
||||||
|
return %result : vector<8x8xi32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
// Test that CombineContractBroadcast will not combine a broadcast that creates
|
||||||
|
// a non-unit dim that is consumed by a reduction iterator.
|
||||||
|
// Moreover, the affine_map's are permuting the position of that reduction
|
||||||
|
// iterator with the position of a parallel iterator to ensure that
|
||||||
|
// the logic guarding that case does not mix up dimensions here.
|
||||||
|
|
||||||
|
#map0 = affine_map<(d0, d1, d2, d3) -> (d1, d0, d3)>
|
||||||
|
#map1 = affine_map<(d0, d1, d2, d3) -> (d1, d2, d3)>
|
||||||
|
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d2)>
|
||||||
|
|
||||||
|
// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d0, d3)>
|
||||||
|
// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d2, d3)>
|
||||||
|
// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d2)>
|
||||||
|
|
||||||
|
// CHECK-LABEL: contract_broadcast_non_unit_dim_reduction_with_permutation
|
||||||
|
// CHECK-SAME: (%[[ARG0:.+]]: vector<8x4xi32>, %[[ARG1:.+]]: vector<8x4xi32>, %[[ARG2:.+]]: vector<8x8xi32>)
|
||||||
|
// CHECK: %[[BROADCAST0:.+]] = vector.broadcast %[[ARG0]] : vector<8x4xi32> to vector<2x8x4xi32>
|
||||||
|
// CHECK: %[[BROADCAST1:.+]] = vector.broadcast %[[ARG1]] : vector<8x4xi32> to vector<2x8x4xi32>
|
||||||
|
// CHECK: vector.contract
|
||||||
|
// CHECK-SAME: indexing_maps = [#[[$map0]], #[[$map1]], #[[$map2]]]
|
||||||
|
// CHECK-SAME: iterator_types = ["parallel", "reduction", "parallel", "reduction"]
|
||||||
|
// CHECK-SAME: %[[BROADCAST0]], %[[BROADCAST1]], %[[ARG2]] : vector<2x8x4xi32>, vector<2x8x4xi32> into vector<8x8xi32>
|
||||||
|
func.func @contract_broadcast_non_unit_dim_reduction_with_permutation(%arg0 : vector<8x4xi32>, %arg1 : vector<8x4xi32>, %arg2 : vector<8x8xi32>) -> vector<8x8xi32> {
|
||||||
|
%0 = vector.broadcast %arg0 : vector<8x4xi32> to vector<2x8x4xi32>
|
||||||
|
%1 = vector.broadcast %arg1 : vector<8x4xi32> to vector<2x8x4xi32>
|
||||||
|
%result = vector.contract {
|
||||||
|
indexing_maps = [#map0, #map1, #map2],
|
||||||
|
iterator_types = ["parallel", "reduction", "parallel", "reduction"],
|
||||||
|
kind = #vector.kind<add>
|
||||||
|
} %0, %1, %arg2 : vector<2x8x4xi32>, vector<2x8x4xi32> into vector<8x8xi32>
|
||||||
|
return %result : vector<8x8xi32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
// Test that CombineContractBroadcast is not combining this case, as that would
|
||||||
|
// result in dropping this contract's only reduction iterator.
|
||||||
|
|
||||||
|
#map0 = affine_map<(d0, d1, d2) -> (d0, d1)>
|
||||||
|
#map1 = affine_map<(d0, d1, d2) -> (d0, d2)>
|
||||||
|
#map2 = affine_map<(d0, d1, d2) -> (d1, d2)>
|
||||||
|
|
||||||
|
// CHECK-LABEL: contract_broadcast_unit_dim_reduction_as_only_reduction
|
||||||
|
// CHECK-SAME: (%[[ARG0:.+]]: vector<8xi32>, %[[ARG1:.+]]: vector<8xi32>, %[[ARG2:.+]]: vector<8x8xi32>)
|
||||||
|
// CHECK: %[[BROADCAST0:.+]] = vector.broadcast %[[ARG0]] : vector<8xi32> to vector<1x8xi32>
|
||||||
|
// CHECK: %[[BROADCAST1:.+]] = vector.broadcast %[[ARG1]] : vector<8xi32> to vector<1x8xi32>
|
||||||
|
// CHECK: vector.contract
|
||||||
|
// CHECK-SAME: indexing_maps = [#[[$map0]], #[[$map1]], #[[$map2]]]
|
||||||
|
// CHECK-SAME: iterator_types = ["reduction", "parallel", "parallel"]
|
||||||
|
// CHECK-SAME: %[[BROADCAST0]], %[[BROADCAST1]], %[[ARG2]] : vector<1x8xi32>, vector<1x8xi32> into vector<8x8xi32>
|
||||||
|
func.func @contract_broadcast_unit_dim_reduction_as_only_reduction(%arg0 : vector<8xi32>, %arg1 : vector<8xi32>, %arg2 : vector<8x8xi32>) -> vector<8x8xi32> {
|
||||||
|
%0 = vector.broadcast %arg0 : vector<8xi32> to vector<1x8xi32>
|
||||||
|
%1 = vector.broadcast %arg1 : vector<8xi32> to vector<1x8xi32>
|
||||||
|
%result = vector.contract {
|
||||||
|
indexing_maps = [#map0, #map1, #map2],
|
||||||
|
iterator_types = ["reduction", "parallel", "parallel"],
|
||||||
|
kind = #vector.kind<add>
|
||||||
|
} %0, %1, %arg2 : vector<1x8xi32>, vector<1x8xi32> into vector<8x8xi32>
|
||||||
|
return %result : vector<8x8xi32>
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Reorder casting ops and vector ops. The casting ops have almost identical
|
// Reorder casting ops and vector ops. The casting ops have almost identical
|
||||||
// pattern, so only arith.extsi op is tested.
|
// pattern, so only arith.extsi op is tested.
|
||||||
|
|
Loading…
Reference in New Issue