forked from OSchip/llvm-project
[NFC] Refactor creation of the BLIS mirco-kernel and improve documentation
Reviewed-by: Tobias Grosser <tobias@grosser.es> llvm-svn: 276616
This commit is contained in:
parent
ce415ff9c5
commit
2cb4d133f5
|
@ -20,6 +20,16 @@ struct isl_schedule;
|
|||
struct isl_schedule_node;
|
||||
struct isl_union_map;
|
||||
|
||||
/// @brief Parameters of the micro kernel.
|
||||
///
|
||||
/// Parameters, which determine sizes of rank-1 (i.e., outer product) update
|
||||
/// used in the optimized matrix multiplication.
|
||||
///
|
||||
struct MicroKernelParamsTy {
|
||||
int Mr;
|
||||
int Nr;
|
||||
};
|
||||
|
||||
namespace polly {
|
||||
extern bool DisablePollyTiling;
|
||||
class Scop;
|
||||
|
@ -232,6 +242,21 @@ private:
|
|||
///
|
||||
/// @param Node The node to check.
|
||||
static bool isMatrMultPattern(__isl_keep isl_schedule_node *Node);
|
||||
|
||||
/// @brief Create the BLIS macro-kernel.
|
||||
///
|
||||
/// We create the BLIS macro-kernel by applying a combination of tiling
|
||||
/// of dimensions of the band node and interchanging of two innermost
|
||||
/// modified dimensions. The values passed in MicroKernelParam are used
|
||||
/// as tile sizes.
|
||||
///
|
||||
/// @param Node The schedule node to be modified.
|
||||
/// @param MicroKernelParams Parameters of the micro kernel
|
||||
/// to be used as tile sizes.
|
||||
/// @see MicroKernelParamsTy
|
||||
static __isl_give isl_schedule_node *
|
||||
createMicroKernel(__isl_take isl_schedule_node *Node,
|
||||
MicroKernelParamsTy MicroKernelParams);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -493,10 +493,27 @@ static __isl_give isl_map *circularShiftOutputDims(__isl_take isl_map *IslMap) {
|
|||
return isl_map_set_tuple_id(IslMap, isl_dim_in, InputDimsId);
|
||||
}
|
||||
|
||||
__isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeMatMulPattern(
|
||||
__isl_take isl_schedule_node *Node, const llvm::TargetTransformInfo *TTI) {
|
||||
__isl_give isl_schedule_node *ScheduleTreeOptimizer::createMicroKernel(
|
||||
__isl_take isl_schedule_node *Node, MicroKernelParamsTy MicroKernelParams) {
|
||||
return applyRegisterTiling(Node, {MicroKernelParams.Mr, MicroKernelParams.Nr},
|
||||
1);
|
||||
}
|
||||
|
||||
/// Get parameters of the BLIS micro kernel.
|
||||
///
|
||||
/// We choose the Mr and Nr parameters of the micro kernel to be large enough
|
||||
/// such that no stalls caused by the combination of latencies and dependencies
|
||||
/// are introduced during the updates of the resulting matrix of the matrix
|
||||
/// multiplication. However, they should also be as small as possible to
|
||||
/// release more registers for entries of multiplied matrices.
|
||||
///
|
||||
/// @param TTI Target Transform Info.
|
||||
/// @return The structure of type MicroKernelParamsTy.
|
||||
/// @see MicroKernelParamsTy
|
||||
static struct MicroKernelParamsTy
|
||||
getMicroKernelParams(const llvm::TargetTransformInfo *TTI) {
|
||||
assert(TTI && "The target transform info should be provided.");
|
||||
// Get a micro-kernel.
|
||||
|
||||
// Nvec - Number of double-precision floating-point numbers that can be hold
|
||||
// by a vector register. Use 2 by default.
|
||||
auto Nvec = TTI->getRegisterBitWidth(true) / 64;
|
||||
|
@ -505,8 +522,14 @@ __isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeMatMulPattern(
|
|||
int Nr =
|
||||
ceil(sqrt(Nvec * LatencyVectorFma * ThrougputVectorFma) / Nvec) * Nvec;
|
||||
int Mr = ceil(Nvec * LatencyVectorFma * ThrougputVectorFma / Nr);
|
||||
std::vector<int> MicroKernelParams{Mr, Nr};
|
||||
Node = applyRegisterTiling(Node, MicroKernelParams, 1);
|
||||
return {Mr, Nr};
|
||||
}
|
||||
|
||||
__isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeMatMulPattern(
|
||||
__isl_take isl_schedule_node *Node, const llvm::TargetTransformInfo *TTI) {
|
||||
assert(TTI && "The target transform info should be provided.");
|
||||
auto MicroKernelParams = getMicroKernelParams(TTI);
|
||||
Node = createMicroKernel(Node, MicroKernelParams);
|
||||
return Node;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue