2017-05-10 17:42:49 +08:00
|
|
|
//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-05-10 17:42:49 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This pass implements IR expansion for reduction intrinsics, allowing targets
|
|
|
|
// to enable the experimental intrinsics until just before codegen.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "llvm/CodeGen/ExpandReductions.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
2017-05-10 17:42:49 +08:00
|
|
|
#include "llvm/CodeGen/Passes.h"
|
|
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include "llvm/IR/IRBuilder.h"
|
|
|
|
#include "llvm/IR/InstIterator.h"
|
|
|
|
#include "llvm/IR/IntrinsicInst.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/IR/Intrinsics.h"
|
2017-05-10 17:42:49 +08:00
|
|
|
#include "llvm/IR/Module.h"
|
Sink all InitializePasses.h includes
This file lists every pass in LLVM, and is included by Pass.h, which is
very popular. Every time we add, remove, or rename a pass in LLVM, it
caused lots of recompilation.
I found this fact by looking at this table, which is sorted by the
number of times a file was changed over the last 100,000 git commits
multiplied by the number of object files that depend on it in the
current checkout:
recompiles touches affected_files header
342380 95 3604 llvm/include/llvm/ADT/STLExtras.h
314730 234 1345 llvm/include/llvm/InitializePasses.h
307036 118 2602 llvm/include/llvm/ADT/APInt.h
213049 59 3611 llvm/include/llvm/Support/MathExtras.h
170422 47 3626 llvm/include/llvm/Support/Compiler.h
162225 45 3605 llvm/include/llvm/ADT/Optional.h
158319 63 2513 llvm/include/llvm/ADT/Triple.h
140322 39 3598 llvm/include/llvm/ADT/StringRef.h
137647 59 2333 llvm/include/llvm/Support/Error.h
131619 73 1803 llvm/include/llvm/Support/FileSystem.h
Before this change, touching InitializePasses.h would cause 1345 files
to recompile. After this change, touching it only causes 550 compiles in
an incremental rebuild.
Reviewers: bkramer, asbirlea, bollu, jdoerfert
Differential Revision: https://reviews.llvm.org/D70211
2019-11-14 05:15:01 +08:00
|
|
|
#include "llvm/InitializePasses.h"
|
2017-05-10 17:42:49 +08:00
|
|
|
#include "llvm/Pass.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/Transforms/Utils/LoopUtils.h"
|
2017-05-10 17:42:49 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
unsigned getOpcode(Intrinsic::ID ID) {
|
|
|
|
switch (ID) {
|
2019-06-11 16:22:10 +08:00
|
|
|
case Intrinsic::experimental_vector_reduce_v2_fadd:
|
2017-05-10 17:42:49 +08:00
|
|
|
return Instruction::FAdd;
|
2019-06-11 16:22:10 +08:00
|
|
|
case Intrinsic::experimental_vector_reduce_v2_fmul:
|
2017-05-10 17:42:49 +08:00
|
|
|
return Instruction::FMul;
|
|
|
|
case Intrinsic::experimental_vector_reduce_add:
|
|
|
|
return Instruction::Add;
|
|
|
|
case Intrinsic::experimental_vector_reduce_mul:
|
|
|
|
return Instruction::Mul;
|
|
|
|
case Intrinsic::experimental_vector_reduce_and:
|
|
|
|
return Instruction::And;
|
|
|
|
case Intrinsic::experimental_vector_reduce_or:
|
|
|
|
return Instruction::Or;
|
|
|
|
case Intrinsic::experimental_vector_reduce_xor:
|
|
|
|
return Instruction::Xor;
|
|
|
|
case Intrinsic::experimental_vector_reduce_smax:
|
|
|
|
case Intrinsic::experimental_vector_reduce_smin:
|
|
|
|
case Intrinsic::experimental_vector_reduce_umax:
|
|
|
|
case Intrinsic::experimental_vector_reduce_umin:
|
|
|
|
return Instruction::ICmp;
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmax:
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmin:
|
|
|
|
return Instruction::FCmp;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected ID");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
|
|
|
|
switch (ID) {
|
|
|
|
case Intrinsic::experimental_vector_reduce_smax:
|
|
|
|
return RecurrenceDescriptor::MRK_SIntMax;
|
|
|
|
case Intrinsic::experimental_vector_reduce_smin:
|
|
|
|
return RecurrenceDescriptor::MRK_SIntMin;
|
|
|
|
case Intrinsic::experimental_vector_reduce_umax:
|
|
|
|
return RecurrenceDescriptor::MRK_UIntMax;
|
|
|
|
case Intrinsic::experimental_vector_reduce_umin:
|
|
|
|
return RecurrenceDescriptor::MRK_UIntMin;
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmax:
|
|
|
|
return RecurrenceDescriptor::MRK_FloatMax;
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmin:
|
|
|
|
return RecurrenceDescriptor::MRK_FloatMin;
|
|
|
|
default:
|
|
|
|
return RecurrenceDescriptor::MRK_Invalid;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
|
|
|
|
bool Changed = false;
|
2018-04-09 23:44:20 +08:00
|
|
|
SmallVector<IntrinsicInst *, 4> Worklist;
|
2019-11-15 02:02:51 +08:00
|
|
|
for (auto &I : instructions(F)) {
|
|
|
|
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
|
|
|
|
switch (II->getIntrinsicID()) {
|
|
|
|
default: break;
|
|
|
|
case Intrinsic::experimental_vector_reduce_v2_fadd:
|
|
|
|
case Intrinsic::experimental_vector_reduce_v2_fmul:
|
|
|
|
case Intrinsic::experimental_vector_reduce_add:
|
|
|
|
case Intrinsic::experimental_vector_reduce_mul:
|
|
|
|
case Intrinsic::experimental_vector_reduce_and:
|
|
|
|
case Intrinsic::experimental_vector_reduce_or:
|
|
|
|
case Intrinsic::experimental_vector_reduce_xor:
|
|
|
|
case Intrinsic::experimental_vector_reduce_smax:
|
|
|
|
case Intrinsic::experimental_vector_reduce_smin:
|
|
|
|
case Intrinsic::experimental_vector_reduce_umax:
|
|
|
|
case Intrinsic::experimental_vector_reduce_umin:
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmax:
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmin:
|
|
|
|
if (TTI->shouldExpandReduction(II))
|
|
|
|
Worklist.push_back(II);
|
2017-05-10 17:42:49 +08:00
|
|
|
|
2019-11-15 02:02:51 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-06-11 16:22:10 +08:00
|
|
|
|
2019-11-15 02:02:51 +08:00
|
|
|
for (auto *II : Worklist) {
|
2019-06-11 16:22:10 +08:00
|
|
|
FastMathFlags FMF =
|
|
|
|
isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
|
|
|
|
Intrinsic::ID ID = II->getIntrinsicID();
|
|
|
|
RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
|
|
|
|
|
|
|
|
Value *Rdx = nullptr;
|
2017-05-10 17:42:49 +08:00
|
|
|
IRBuilder<> Builder(II);
|
2019-06-11 16:22:10 +08:00
|
|
|
IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
|
|
|
|
Builder.setFastMathFlags(FMF);
|
2017-05-10 17:42:49 +08:00
|
|
|
switch (ID) {
|
2019-11-15 02:02:51 +08:00
|
|
|
default: llvm_unreachable("Unexpected intrinsic!");
|
2019-06-11 16:22:10 +08:00
|
|
|
case Intrinsic::experimental_vector_reduce_v2_fadd:
|
|
|
|
case Intrinsic::experimental_vector_reduce_v2_fmul: {
|
2017-05-10 17:42:49 +08:00
|
|
|
// FMFs must be attached to the call, otherwise it's an ordered reduction
|
2018-04-09 23:44:20 +08:00
|
|
|
// and it can't be handled by generating a shuffle sequence.
|
2019-06-11 16:22:10 +08:00
|
|
|
Value *Acc = II->getArgOperand(0);
|
|
|
|
Value *Vec = II->getArgOperand(1);
|
|
|
|
if (!FMF.allowReassoc())
|
|
|
|
Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
|
|
|
|
else {
|
2020-07-10 02:51:03 +08:00
|
|
|
if (!isPowerOf2_32(
|
|
|
|
cast<FixedVectorType>(Vec->getType())->getNumElements()))
|
2019-11-03 11:59:12 +08:00
|
|
|
continue;
|
|
|
|
|
2019-06-11 16:22:10 +08:00
|
|
|
Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
|
|
|
|
Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
|
|
|
|
Acc, Rdx, "bin.rdx");
|
|
|
|
}
|
2019-11-15 02:02:51 +08:00
|
|
|
break;
|
|
|
|
}
|
2017-05-10 17:42:49 +08:00
|
|
|
case Intrinsic::experimental_vector_reduce_add:
|
|
|
|
case Intrinsic::experimental_vector_reduce_mul:
|
|
|
|
case Intrinsic::experimental_vector_reduce_and:
|
|
|
|
case Intrinsic::experimental_vector_reduce_or:
|
|
|
|
case Intrinsic::experimental_vector_reduce_xor:
|
|
|
|
case Intrinsic::experimental_vector_reduce_smax:
|
|
|
|
case Intrinsic::experimental_vector_reduce_smin:
|
|
|
|
case Intrinsic::experimental_vector_reduce_umax:
|
2020-09-12 21:08:07 +08:00
|
|
|
case Intrinsic::experimental_vector_reduce_umin: {
|
|
|
|
Value *Vec = II->getArgOperand(0);
|
|
|
|
if (!isPowerOf2_32(
|
|
|
|
cast<FixedVectorType>(Vec->getType())->getNumElements()))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
|
|
|
|
break;
|
|
|
|
}
|
2017-05-10 17:42:49 +08:00
|
|
|
case Intrinsic::experimental_vector_reduce_fmax:
|
2019-06-11 16:22:10 +08:00
|
|
|
case Intrinsic::experimental_vector_reduce_fmin: {
|
2020-09-12 21:08:07 +08:00
|
|
|
// FIXME: We only expand 'fast' reductions here because the underlying
|
|
|
|
// code in createMinMaxOp() assumes that comparisons use 'fast'
|
|
|
|
// semantics.
|
2019-06-11 16:22:10 +08:00
|
|
|
Value *Vec = II->getArgOperand(0);
|
2020-07-10 02:51:03 +08:00
|
|
|
if (!isPowerOf2_32(
|
2020-09-12 21:08:07 +08:00
|
|
|
cast<FixedVectorType>(Vec->getType())->getNumElements()) ||
|
|
|
|
!FMF.isFast())
|
2019-11-03 11:59:12 +08:00
|
|
|
continue;
|
|
|
|
|
2019-06-11 16:22:10 +08:00
|
|
|
Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
|
2019-11-15 02:02:51 +08:00
|
|
|
break;
|
|
|
|
}
|
2017-05-10 17:42:49 +08:00
|
|
|
}
|
|
|
|
II->replaceAllUsesWith(Rdx);
|
|
|
|
II->eraseFromParent();
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
|
|
|
class ExpandReductions : public FunctionPass {
|
|
|
|
public:
|
|
|
|
static char ID;
|
|
|
|
ExpandReductions() : FunctionPass(ID) {
|
|
|
|
initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
|
|
|
|
|
|
|
bool runOnFunction(Function &F) override {
|
|
|
|
const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
|
|
|
return expandReductions(F, TTI);
|
|
|
|
}
|
|
|
|
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
|
AU.addRequired<TargetTransformInfoWrapperPass>();
|
|
|
|
AU.setPreservesCFG();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
char ExpandReductions::ID;
|
|
|
|
INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
|
|
|
|
"Expand reduction intrinsics", false, false)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
|
|
|
INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
|
|
|
|
"Expand reduction intrinsics", false, false)
|
|
|
|
|
|
|
|
FunctionPass *llvm::createExpandReductionsPass() {
|
|
|
|
return new ExpandReductions();
|
|
|
|
}
|
|
|
|
|
|
|
|
PreservedAnalyses ExpandReductionsPass::run(Function &F,
|
|
|
|
FunctionAnalysisManager &AM) {
|
|
|
|
const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
|
|
|
|
if (!expandReductions(F, &TTI))
|
|
|
|
return PreservedAnalyses::all();
|
|
|
|
PreservedAnalyses PA;
|
|
|
|
PA.preserveSet<CFGAnalyses>();
|
|
|
|
return PA;
|
|
|
|
}
|