2017-05-10 17:42:49 +08:00
|
|
|
//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This pass implements IR expansion for reduction intrinsics, allowing targets
|
|
|
|
// to enable the experimental intrinsics until just before codegen.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "llvm/CodeGen/ExpandReductions.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
2017-05-10 17:42:49 +08:00
|
|
|
#include "llvm/CodeGen/Passes.h"
|
|
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include "llvm/IR/IRBuilder.h"
|
|
|
|
#include "llvm/IR/InstIterator.h"
|
|
|
|
#include "llvm/IR/IntrinsicInst.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/IR/Intrinsics.h"
|
2017-05-10 17:42:49 +08:00
|
|
|
#include "llvm/IR/Module.h"
|
|
|
|
#include "llvm/Pass.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/Transforms/Utils/LoopUtils.h"
|
2017-05-10 17:42:49 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
unsigned getOpcode(Intrinsic::ID ID) {
|
|
|
|
switch (ID) {
|
|
|
|
case Intrinsic::experimental_vector_reduce_fadd:
|
|
|
|
return Instruction::FAdd;
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmul:
|
|
|
|
return Instruction::FMul;
|
|
|
|
case Intrinsic::experimental_vector_reduce_add:
|
|
|
|
return Instruction::Add;
|
|
|
|
case Intrinsic::experimental_vector_reduce_mul:
|
|
|
|
return Instruction::Mul;
|
|
|
|
case Intrinsic::experimental_vector_reduce_and:
|
|
|
|
return Instruction::And;
|
|
|
|
case Intrinsic::experimental_vector_reduce_or:
|
|
|
|
return Instruction::Or;
|
|
|
|
case Intrinsic::experimental_vector_reduce_xor:
|
|
|
|
return Instruction::Xor;
|
|
|
|
case Intrinsic::experimental_vector_reduce_smax:
|
|
|
|
case Intrinsic::experimental_vector_reduce_smin:
|
|
|
|
case Intrinsic::experimental_vector_reduce_umax:
|
|
|
|
case Intrinsic::experimental_vector_reduce_umin:
|
|
|
|
return Instruction::ICmp;
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmax:
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmin:
|
|
|
|
return Instruction::FCmp;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected ID");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
|
|
|
|
switch (ID) {
|
|
|
|
case Intrinsic::experimental_vector_reduce_smax:
|
|
|
|
return RecurrenceDescriptor::MRK_SIntMax;
|
|
|
|
case Intrinsic::experimental_vector_reduce_smin:
|
|
|
|
return RecurrenceDescriptor::MRK_SIntMin;
|
|
|
|
case Intrinsic::experimental_vector_reduce_umax:
|
|
|
|
return RecurrenceDescriptor::MRK_UIntMax;
|
|
|
|
case Intrinsic::experimental_vector_reduce_umin:
|
|
|
|
return RecurrenceDescriptor::MRK_UIntMin;
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmax:
|
|
|
|
return RecurrenceDescriptor::MRK_FloatMax;
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmin:
|
|
|
|
return RecurrenceDescriptor::MRK_FloatMin;
|
|
|
|
default:
|
|
|
|
return RecurrenceDescriptor::MRK_Invalid;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
|
|
|
|
bool Changed = false;
|
|
|
|
SmallVector<IntrinsicInst*, 4> Worklist;
|
|
|
|
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
|
|
|
|
if (auto II = dyn_cast<IntrinsicInst>(&*I))
|
|
|
|
Worklist.push_back(II);
|
|
|
|
|
|
|
|
for (auto *II : Worklist) {
|
|
|
|
IRBuilder<> Builder(II);
|
|
|
|
Value *Vec = nullptr;
|
|
|
|
auto ID = II->getIntrinsicID();
|
|
|
|
auto MRK = RecurrenceDescriptor::MRK_Invalid;
|
|
|
|
switch (ID) {
|
|
|
|
case Intrinsic::experimental_vector_reduce_fadd:
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmul:
|
|
|
|
// FMFs must be attached to the call, otherwise it's an ordered reduction
|
|
|
|
// and it can't be handled by generating this shuffle sequence.
|
|
|
|
// TODO: Implement scalarization of ordered reductions here for targets
|
|
|
|
// without native support.
|
[IR] redefine 'UnsafeAlgebra' / 'reassoc' fast-math-flags and add 'trans' fast-math-flag
As discussed on llvm-dev:
http://lists.llvm.org/pipermail/llvm-dev/2016-November/107104.html
and again more recently:
http://lists.llvm.org/pipermail/llvm-dev/2017-October/118118.html
...this is a step in cleaning up our fast-math-flags implementation in IR to better match
the capabilities of both clang's user-visible flags and the backend's flags for SDNode.
As proposed in the above threads, we're replacing the 'UnsafeAlgebra' bit (which had the
'umbrella' meaning that all flags are set) with a new bit that only applies to algebraic
reassociation - 'AllowReassoc'.
We're also adding a bit to allow approximations for library functions called 'ApproxFunc'
(this was initially proposed as 'libm' or similar).
...and we're out of bits. 7 bits ought to be enough for anyone, right? :) FWIW, I did
look at getting this out of SubclassOptionalData via SubclassData (spacious 16-bits),
but that's apparently already used for other purposes. Also, I don't think we can just
add a field to FPMathOperator because Operator is not intended to be instantiated.
We'll defer movement of FMF to another day.
We keep the 'fast' keyword. I thought about removing that, but seeing IR like this:
%f.fast = fadd reassoc nnan ninf nsz arcp contract afn float %op1, %op2
...made me think we want to keep the shortcut synonym.
Finally, this change is binary incompatible with existing IR as seen in the
compatibility tests. This statement:
"Newer releases can ignore features from older releases, but they cannot miscompile
them. For example, if nsw is ever replaced with something else, dropping it would be
a valid way to upgrade the IR."
( http://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility )
...provides the flexibility we want to make this change without requiring a new IR
version. Ie, we're not loosening the FP strictness of existing IR. At worst, we will
fail to optimize some previously 'fast' code because it's no longer recognized as
'fast'. This should get fixed as we audit/squash all of the uses of 'isFast()'.
Note: an inter-dependent clang commit to use the new API name should closely follow
commit.
Differential Revision: https://reviews.llvm.org/D39304
llvm-svn: 317488
2017-11-07 00:27:15 +08:00
|
|
|
if (!II->getFastMathFlags().isFast())
|
2017-05-10 17:42:49 +08:00
|
|
|
continue;
|
|
|
|
Vec = II->getArgOperand(1);
|
|
|
|
break;
|
|
|
|
case Intrinsic::experimental_vector_reduce_add:
|
|
|
|
case Intrinsic::experimental_vector_reduce_mul:
|
|
|
|
case Intrinsic::experimental_vector_reduce_and:
|
|
|
|
case Intrinsic::experimental_vector_reduce_or:
|
|
|
|
case Intrinsic::experimental_vector_reduce_xor:
|
|
|
|
case Intrinsic::experimental_vector_reduce_smax:
|
|
|
|
case Intrinsic::experimental_vector_reduce_smin:
|
|
|
|
case Intrinsic::experimental_vector_reduce_umax:
|
|
|
|
case Intrinsic::experimental_vector_reduce_umin:
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmax:
|
|
|
|
case Intrinsic::experimental_vector_reduce_fmin:
|
|
|
|
Vec = II->getArgOperand(0);
|
|
|
|
MRK = getMRK(ID);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!TTI->shouldExpandReduction(II))
|
|
|
|
continue;
|
|
|
|
auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
|
|
|
|
II->replaceAllUsesWith(Rdx);
|
|
|
|
II->eraseFromParent();
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
|
|
|
class ExpandReductions : public FunctionPass {
|
|
|
|
public:
|
|
|
|
static char ID;
|
|
|
|
ExpandReductions() : FunctionPass(ID) {
|
|
|
|
initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
|
|
|
|
|
|
|
bool runOnFunction(Function &F) override {
|
|
|
|
const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
|
|
|
return expandReductions(F, TTI);
|
|
|
|
}
|
|
|
|
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
|
AU.addRequired<TargetTransformInfoWrapperPass>();
|
|
|
|
AU.setPreservesCFG();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
char ExpandReductions::ID;
|
|
|
|
INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
|
|
|
|
"Expand reduction intrinsics", false, false)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
|
|
|
INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
|
|
|
|
"Expand reduction intrinsics", false, false)
|
|
|
|
|
|
|
|
FunctionPass *llvm::createExpandReductionsPass() {
|
|
|
|
return new ExpandReductions();
|
|
|
|
}
|
|
|
|
|
|
|
|
PreservedAnalyses ExpandReductionsPass::run(Function &F,
|
|
|
|
FunctionAnalysisManager &AM) {
|
|
|
|
const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
|
|
|
|
if (!expandReductions(F, &TTI))
|
|
|
|
return PreservedAnalyses::all();
|
|
|
|
PreservedAnalyses PA;
|
|
|
|
PA.preserveSet<CFGAnalyses>();
|
|
|
|
return PA;
|
|
|
|
}
|