llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp

//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass implements IR expansion for reduction intrinsics, allowing targets
// to enable the experimental intrinsics until just before codegen.
//
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/ExpandReductions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/LoopUtils.h"

using namespace llvm;

namespace {

unsigned getOpcode(Intrinsic::ID ID) {
  switch (ID) {
  case Intrinsic::experimental_vector_reduce_fadd:
    return Instruction::FAdd;
  case Intrinsic::experimental_vector_reduce_fmul:
    return Instruction::FMul;
  case Intrinsic::experimental_vector_reduce_add:
    return Instruction::Add;
  case Intrinsic::experimental_vector_reduce_mul:
    return Instruction::Mul;
  case Intrinsic::experimental_vector_reduce_and:
    return Instruction::And;
  case Intrinsic::experimental_vector_reduce_or:
    return Instruction::Or;
  case Intrinsic::experimental_vector_reduce_xor:
    return Instruction::Xor;
  case Intrinsic::experimental_vector_reduce_smax:
  case Intrinsic::experimental_vector_reduce_smin:
  case Intrinsic::experimental_vector_reduce_umax:
  case Intrinsic::experimental_vector_reduce_umin:
    return Instruction::ICmp;
  case Intrinsic::experimental_vector_reduce_fmax:
  case Intrinsic::experimental_vector_reduce_fmin:
    return Instruction::FCmp;
  default:
    llvm_unreachable("Unexpected ID");
  }
}

RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
  switch (ID) {
  case Intrinsic::experimental_vector_reduce_smax:
    return RecurrenceDescriptor::MRK_SIntMax;
  case Intrinsic::experimental_vector_reduce_smin:
    return RecurrenceDescriptor::MRK_SIntMin;
  case Intrinsic::experimental_vector_reduce_umax:
    return RecurrenceDescriptor::MRK_UIntMax;
  case Intrinsic::experimental_vector_reduce_umin:
    return RecurrenceDescriptor::MRK_UIntMin;
  case Intrinsic::experimental_vector_reduce_fmax:
    return RecurrenceDescriptor::MRK_FloatMax;
  case Intrinsic::experimental_vector_reduce_fmin:
    return RecurrenceDescriptor::MRK_FloatMin;
  default:
    return RecurrenceDescriptor::MRK_Invalid;
  }
}

bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
  bool Changed = false;
  SmallVector<IntrinsicInst *, 4> Worklist;
  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
    if (auto II = dyn_cast<IntrinsicInst>(&*I))
      Worklist.push_back(II);

  for (auto *II : Worklist) {
    IRBuilder<> Builder(II);
    bool IsOrdered = false;
    Value *Acc = nullptr;
    Value *Vec = nullptr;
    auto ID = II->getIntrinsicID();
    auto MRK = RecurrenceDescriptor::MRK_Invalid;
    switch (ID) {
    case Intrinsic::experimental_vector_reduce_fadd:
    case Intrinsic::experimental_vector_reduce_fmul:
      // FMFs must be attached to the call, otherwise it's an ordered reduction
      // and it can't be handled by generating a shuffle sequence.
      if (!II->getFastMathFlags().isFast())
        IsOrdered = true;
      Acc = II->getArgOperand(0);
      Vec = II->getArgOperand(1);
      break;
    case Intrinsic::experimental_vector_reduce_add:
    case Intrinsic::experimental_vector_reduce_mul:
    case Intrinsic::experimental_vector_reduce_and:
    case Intrinsic::experimental_vector_reduce_or:
    case Intrinsic::experimental_vector_reduce_xor:
    case Intrinsic::experimental_vector_reduce_smax:
    case Intrinsic::experimental_vector_reduce_smin:
    case Intrinsic::experimental_vector_reduce_umax:
    case Intrinsic::experimental_vector_reduce_umin:
    case Intrinsic::experimental_vector_reduce_fmax:
    case Intrinsic::experimental_vector_reduce_fmin:
      Vec = II->getArgOperand(0);
      MRK = getMRK(ID);
      break;
    default:
      continue;
    }
    if (!TTI->shouldExpandReduction(II))
      continue;
    // Propagate FMF using the builder.
    FastMathFlags FMF =
        isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
    Builder.setFastMathFlags(FMF);
    Value *Rdx =
        IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
                  : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
    II->replaceAllUsesWith(Rdx);
    II->eraseFromParent();
    Changed = true;
  }
  return Changed;
}

class ExpandReductions : public FunctionPass {
public:
  static char ID;
  ExpandReductions() : FunctionPass(ID) {
    initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
  }

  bool runOnFunction(Function &F) override {
    const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
    return expandReductions(F, TTI);
  }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<TargetTransformInfoWrapperPass>();
    AU.setPreservesCFG();
  }
};
}

char ExpandReductions::ID;
INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
                      "Expand reduction intrinsics", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
                    "Expand reduction intrinsics", false, false)

FunctionPass *llvm::createExpandReductionsPass() {
  return new ExpandReductions();
}

PreservedAnalyses ExpandReductionsPass::run(Function &F,
                                            FunctionAnalysisManager &AM) {
  const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
  if (!expandReductions(F, &TTI))
    return PreservedAnalyses::all();
  PreservedAnalyses PA;
  PA.preserveSet<CFGAnalyses>();
  return PA;
}
Add a late IR expansion pass for the experimental reduction intrinsics. This pass uses a new target hook to decide whether or not to expand a particular intrinsic to the shuffevector sequence. Differential Revision: https://reviews.llvm.org/D32245 llvm-svn: 302631 2017-05-10 17:42:49 +08:00			`//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//`
			`//`
Update the file headers across all of the LLVM projects in the monorepo to reflect the new license. We understand that people may be surprised that we're moving the header entirely to discuss the new license. We checked this carefully with the Foundation's lawyer and we believe this is the correct approach. Essentially, all code in the project is now made available by the LLVM project under our new license, so you will see that the license headers include that license only. Some of our contributors have contributed code under our old license, and accordingly, we have retained a copy of our old license notice in the top-level files in each project and repository. llvm-svn: 351636 2019-01-19 16:50:56 +08:00			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
Add a late IR expansion pass for the experimental reduction intrinsics. This pass uses a new target hook to decide whether or not to expand a particular intrinsic to the shuffevector sequence. Differential Revision: https://reviews.llvm.org/D32245 llvm-svn: 302631 2017-05-10 17:42:49 +08:00			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// This pass implements IR expansion for reduction intrinsics, allowing targets`
			`// to enable the experimental intrinsics until just before codegen.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "llvm/CodeGen/ExpandReductions.h"`
Sort the remaining #include lines in include/... and lib/.... I did this a long time ago with a janky python script, but now clang-format has built-in support for this. I fed clang-format every line with a #include and let it re-sort things according to the precise LLVM rules for include ordering baked into clang-format these days. I've reverted a number of files where the results of sorting includes isn't healthy. Either places where we have legacy code relying on particular include ordering (where possible, I'll fix these separately) or where we have particular formatting around #include lines that I didn't want to disturb in this patch. This patch is entirely mechanical. If you get merge conflicts or anything, just ignore the changes in this patch and run clang-format over your #include lines in the files. Sorry for any noise here, but it is important to keep these things stable. I was seeing an increasing number of patches with irrelevant re-ordering of #include lines because clang-format was used. This patch at least isolates that churn, makes it easy to skip when resolving conflicts, and gets us to a clean baseline (again). llvm-svn: 304787 2017-06-06 19:49:48 +08:00			`#include "llvm/Analysis/TargetTransformInfo.h"`
Add a late IR expansion pass for the experimental reduction intrinsics. This pass uses a new target hook to decide whether or not to expand a particular intrinsic to the shuffevector sequence. Differential Revision: https://reviews.llvm.org/D32245 llvm-svn: 302631 2017-05-10 17:42:49 +08:00			`#include "llvm/CodeGen/Passes.h"`
			`#include "llvm/IR/Function.h"`
			`#include "llvm/IR/IRBuilder.h"`
			`#include "llvm/IR/InstIterator.h"`
			`#include "llvm/IR/IntrinsicInst.h"`
Sort the remaining #include lines in include/... and lib/.... I did this a long time ago with a janky python script, but now clang-format has built-in support for this. I fed clang-format every line with a #include and let it re-sort things according to the precise LLVM rules for include ordering baked into clang-format these days. I've reverted a number of files where the results of sorting includes isn't healthy. Either places where we have legacy code relying on particular include ordering (where possible, I'll fix these separately) or where we have particular formatting around #include lines that I didn't want to disturb in this patch. This patch is entirely mechanical. If you get merge conflicts or anything, just ignore the changes in this patch and run clang-format over your #include lines in the files. Sorry for any noise here, but it is important to keep these things stable. I was seeing an increasing number of patches with irrelevant re-ordering of #include lines because clang-format was used. This patch at least isolates that churn, makes it easy to skip when resolving conflicts, and gets us to a clean baseline (again). llvm-svn: 304787 2017-06-06 19:49:48 +08:00			`#include "llvm/IR/Intrinsics.h"`
Add a late IR expansion pass for the experimental reduction intrinsics. This pass uses a new target hook to decide whether or not to expand a particular intrinsic to the shuffevector sequence. Differential Revision: https://reviews.llvm.org/D32245 llvm-svn: 302631 2017-05-10 17:42:49 +08:00			`#include "llvm/IR/Module.h"`
			`#include "llvm/Pass.h"`
Sort the remaining #include lines in include/... and lib/.... I did this a long time ago with a janky python script, but now clang-format has built-in support for this. I fed clang-format every line with a #include and let it re-sort things according to the precise LLVM rules for include ordering baked into clang-format these days. I've reverted a number of files where the results of sorting includes isn't healthy. Either places where we have legacy code relying on particular include ordering (where possible, I'll fix these separately) or where we have particular formatting around #include lines that I didn't want to disturb in this patch. This patch is entirely mechanical. If you get merge conflicts or anything, just ignore the changes in this patch and run clang-format over your #include lines in the files. Sorry for any noise here, but it is important to keep these things stable. I was seeing an increasing number of patches with irrelevant re-ordering of #include lines because clang-format was used. This patch at least isolates that churn, makes it easy to skip when resolving conflicts, and gets us to a clean baseline (again). llvm-svn: 304787 2017-06-06 19:49:48 +08:00			`#include "llvm/Transforms/Utils/LoopUtils.h"`
Add a late IR expansion pass for the experimental reduction intrinsics. This pass uses a new target hook to decide whether or not to expand a particular intrinsic to the shuffevector sequence. Differential Revision: https://reviews.llvm.org/D32245 llvm-svn: 302631 2017-05-10 17:42:49 +08:00
			`using namespace llvm;`

			`namespace {`

			`unsigned getOpcode(Intrinsic::ID ID) {`
			`switch (ID) {`
			`case Intrinsic::experimental_vector_reduce_fadd:`
			`return Instruction::FAdd;`
			`case Intrinsic::experimental_vector_reduce_fmul:`
			`return Instruction::FMul;`
			`case Intrinsic::experimental_vector_reduce_add:`
			`return Instruction::Add;`
			`case Intrinsic::experimental_vector_reduce_mul:`
			`return Instruction::Mul;`
			`case Intrinsic::experimental_vector_reduce_and:`
			`return Instruction::And;`
			`case Intrinsic::experimental_vector_reduce_or:`
			`return Instruction::Or;`
			`case Intrinsic::experimental_vector_reduce_xor:`
			`return Instruction::Xor;`
			`case Intrinsic::experimental_vector_reduce_smax:`
			`case Intrinsic::experimental_vector_reduce_smin:`
			`case Intrinsic::experimental_vector_reduce_umax:`
			`case Intrinsic::experimental_vector_reduce_umin:`
			`return Instruction::ICmp;`
			`case Intrinsic::experimental_vector_reduce_fmax:`
			`case Intrinsic::experimental_vector_reduce_fmin:`
			`return Instruction::FCmp;`
			`default:`
			`llvm_unreachable("Unexpected ID");`
			`}`
			`}`

			`RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {`
			`switch (ID) {`
			`case Intrinsic::experimental_vector_reduce_smax:`
			`return RecurrenceDescriptor::MRK_SIntMax;`
			`case Intrinsic::experimental_vector_reduce_smin:`
			`return RecurrenceDescriptor::MRK_SIntMin;`
			`case Intrinsic::experimental_vector_reduce_umax:`
			`return RecurrenceDescriptor::MRK_UIntMax;`
			`case Intrinsic::experimental_vector_reduce_umin:`
			`return RecurrenceDescriptor::MRK_UIntMin;`
			`case Intrinsic::experimental_vector_reduce_fmax:`
			`return RecurrenceDescriptor::MRK_FloatMax;`
			`case Intrinsic::experimental_vector_reduce_fmin:`
			`return RecurrenceDescriptor::MRK_FloatMin;`
			`default:`
			`return RecurrenceDescriptor::MRK_Invalid;`
			`}`
			`}`

			`bool expandReductions(Function &F, const TargetTransformInfo *TTI) {`
			`bool Changed = false;`
Support generic expansion of ordered vector reduction (PR36732) Without the fast math flags, the llvm.experimental.vector.reduce.fadd/fmul intrinsic expansions must be expanded in order. This patch scalarizes the reduction, applying the accumulator at the start of the sequence: ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[NumElts-1] Differential Revision: https://reviews.llvm.org/D45366 llvm-svn: 329585 2018-04-09 23:44:20 +08:00			`SmallVector<IntrinsicInst *, 4> Worklist;`
Add a late IR expansion pass for the experimental reduction intrinsics. This pass uses a new target hook to decide whether or not to expand a particular intrinsic to the shuffevector sequence. Differential Revision: https://reviews.llvm.org/D32245 llvm-svn: 302631 2017-05-10 17:42:49 +08:00			`for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)`
			`if (auto II = dyn_cast<IntrinsicInst>(&*I))`
			`Worklist.push_back(II);`

			`for (auto *II : Worklist) {`
			`IRBuilder<> Builder(II);`
Support generic expansion of ordered vector reduction (PR36732) Without the fast math flags, the llvm.experimental.vector.reduce.fadd/fmul intrinsic expansions must be expanded in order. This patch scalarizes the reduction, applying the accumulator at the start of the sequence: ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[NumElts-1] Differential Revision: https://reviews.llvm.org/D45366 llvm-svn: 329585 2018-04-09 23:44:20 +08:00			`bool IsOrdered = false;`
			`Value *Acc = nullptr;`
Add a late IR expansion pass for the experimental reduction intrinsics. This pass uses a new target hook to decide whether or not to expand a particular intrinsic to the shuffevector sequence. Differential Revision: https://reviews.llvm.org/D32245 llvm-svn: 302631 2017-05-10 17:42:49 +08:00			`Value *Vec = nullptr;`
			`auto ID = II->getIntrinsicID();`
			`auto MRK = RecurrenceDescriptor::MRK_Invalid;`
			`switch (ID) {`
			`case Intrinsic::experimental_vector_reduce_fadd:`
			`case Intrinsic::experimental_vector_reduce_fmul:`
			`// FMFs must be attached to the call, otherwise it's an ordered reduction`
Support generic expansion of ordered vector reduction (PR36732) Without the fast math flags, the llvm.experimental.vector.reduce.fadd/fmul intrinsic expansions must be expanded in order. This patch scalarizes the reduction, applying the accumulator at the start of the sequence: ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[NumElts-1] Differential Revision: https://reviews.llvm.org/D45366 llvm-svn: 329585 2018-04-09 23:44:20 +08:00			`// and it can't be handled by generating a shuffle sequence.`
[IR] redefine 'UnsafeAlgebra' / 'reassoc' fast-math-flags and add 'trans' fast-math-flag As discussed on llvm-dev: http://lists.llvm.org/pipermail/llvm-dev/2016-November/107104.html and again more recently: http://lists.llvm.org/pipermail/llvm-dev/2017-October/118118.html ...this is a step in cleaning up our fast-math-flags implementation in IR to better match the capabilities of both clang's user-visible flags and the backend's flags for SDNode. As proposed in the above threads, we're replacing the 'UnsafeAlgebra' bit (which had the 'umbrella' meaning that all flags are set) with a new bit that only applies to algebraic reassociation - 'AllowReassoc'. We're also adding a bit to allow approximations for library functions called 'ApproxFunc' (this was initially proposed as 'libm' or similar). ...and we're out of bits. 7 bits ought to be enough for anyone, right? :) FWIW, I did look at getting this out of SubclassOptionalData via SubclassData (spacious 16-bits), but that's apparently already used for other purposes. Also, I don't think we can just add a field to FPMathOperator because Operator is not intended to be instantiated. We'll defer movement of FMF to another day. We keep the 'fast' keyword. I thought about removing that, but seeing IR like this: %f.fast = fadd reassoc nnan ninf nsz arcp contract afn float %op1, %op2 ...made me think we want to keep the shortcut synonym. Finally, this change is binary incompatible with existing IR as seen in the compatibility tests. This statement: "Newer releases can ignore features from older releases, but they cannot miscompile them. For example, if nsw is ever replaced with something else, dropping it would be a valid way to upgrade the IR." ( http://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility ) ...provides the flexibility we want to make this change without requiring a new IR version. Ie, we're not loosening the FP strictness of existing IR. At worst, we will fail to optimize some previously 'fast' code because it's no longer recognized as 'fast'. This should get fixed as we audit/squash all of the uses of 'isFast()'. Note: an inter-dependent clang commit to use the new API name should closely follow commit. Differential Revision: https://reviews.llvm.org/D39304 llvm-svn: 317488 2017-11-07 00:27:15 +08:00			`if (!II->getFastMathFlags().isFast())`
Support generic expansion of ordered vector reduction (PR36732) Without the fast math flags, the llvm.experimental.vector.reduce.fadd/fmul intrinsic expansions must be expanded in order. This patch scalarizes the reduction, applying the accumulator at the start of the sequence: ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[NumElts-1] Differential Revision: https://reviews.llvm.org/D45366 llvm-svn: 329585 2018-04-09 23:44:20 +08:00			`IsOrdered = true;`
			`Acc = II->getArgOperand(0);`
Add a late IR expansion pass for the experimental reduction intrinsics. This pass uses a new target hook to decide whether or not to expand a particular intrinsic to the shuffevector sequence. Differential Revision: https://reviews.llvm.org/D32245 llvm-svn: 302631 2017-05-10 17:42:49 +08:00			`Vec = II->getArgOperand(1);`
			`break;`
			`case Intrinsic::experimental_vector_reduce_add:`
			`case Intrinsic::experimental_vector_reduce_mul:`
			`case Intrinsic::experimental_vector_reduce_and:`
			`case Intrinsic::experimental_vector_reduce_or:`
			`case Intrinsic::experimental_vector_reduce_xor:`
			`case Intrinsic::experimental_vector_reduce_smax:`
			`case Intrinsic::experimental_vector_reduce_smin:`
			`case Intrinsic::experimental_vector_reduce_umax:`
			`case Intrinsic::experimental_vector_reduce_umin:`
			`case Intrinsic::experimental_vector_reduce_fmax:`
			`case Intrinsic::experimental_vector_reduce_fmin:`
			`Vec = II->getArgOperand(0);`
			`MRK = getMRK(ID);`
			`break;`
			`default:`
			`continue;`
			`}`
			`if (!TTI->shouldExpandReduction(II))`
			`continue;`
[LoopUtils][SLPVectorizer] clean up management of fast-math-flags Instead of passing around fast-math-flags as a parameter, we can set those using an IRBuilder guard object. This is no-functional-change-intended. The motivation is to eventually fix the vectorizers to use and set the correct fast-math-flags for reductions. Examples of that not behaving as expected are: https://bugs.llvm.org/show_bug.cgi?id=23116 (should be able to reduce with less than 'fast') https://bugs.llvm.org/show_bug.cgi?id=35538 (possible miscompile for -0.0) D61802 (should be able to reduce with IR-level FMF) Differential Revision: https://reviews.llvm.org/D62272 llvm-svn: 362612 2019-06-05 22:58:04 +08:00			`// Propagate FMF using the builder.`
Reland "Relax constraints for reduction vectorization" Change from original commit: move test (that uses an X86 triple) into the X86 subdirectory. Original description: Gating vectorizing reductions on all fastmath flags seems unnecessary; `reassoc` should be sufficient. Reviewers: tvvikram, mkuper, kristof.beyls, sdesmalen, Ayal Reviewed By: sdesmalen Subscribers: dcaballe, huntergr, jmolloy, mcrosier, jlebar, bixia, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D57728 llvm-svn: 355889 2019-03-12 09:31:44 +08:00			`FastMathFlags FMF =`
			`isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};`
[LoopUtils][SLPVectorizer] clean up management of fast-math-flags Instead of passing around fast-math-flags as a parameter, we can set those using an IRBuilder guard object. This is no-functional-change-intended. The motivation is to eventually fix the vectorizers to use and set the correct fast-math-flags for reductions. Examples of that not behaving as expected are: https://bugs.llvm.org/show_bug.cgi?id=23116 (should be able to reduce with less than 'fast') https://bugs.llvm.org/show_bug.cgi?id=35538 (possible miscompile for -0.0) D61802 (should be able to reduce with IR-level FMF) Differential Revision: https://reviews.llvm.org/D62272 llvm-svn: 362612 2019-06-05 22:58:04 +08:00			`IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);`
			`Builder.setFastMathFlags(FMF);`
Support generic expansion of ordered vector reduction (PR36732) Without the fast math flags, the llvm.experimental.vector.reduce.fadd/fmul intrinsic expansions must be expanded in order. This patch scalarizes the reduction, applying the accumulator at the start of the sequence: ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[NumElts-1] Differential Revision: https://reviews.llvm.org/D45366 llvm-svn: 329585 2018-04-09 23:44:20 +08:00			`Value *Rdx =`
			`IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)`
[LoopUtils][SLPVectorizer] clean up management of fast-math-flags Instead of passing around fast-math-flags as a parameter, we can set those using an IRBuilder guard object. This is no-functional-change-intended. The motivation is to eventually fix the vectorizers to use and set the correct fast-math-flags for reductions. Examples of that not behaving as expected are: https://bugs.llvm.org/show_bug.cgi?id=23116 (should be able to reduce with less than 'fast') https://bugs.llvm.org/show_bug.cgi?id=35538 (possible miscompile for -0.0) D61802 (should be able to reduce with IR-level FMF) Differential Revision: https://reviews.llvm.org/D62272 llvm-svn: 362612 2019-06-05 22:58:04 +08:00			`: getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);`
Add a late IR expansion pass for the experimental reduction intrinsics. This pass uses a new target hook to decide whether or not to expand a particular intrinsic to the shuffevector sequence. Differential Revision: https://reviews.llvm.org/D32245 llvm-svn: 302631 2017-05-10 17:42:49 +08:00			`II->replaceAllUsesWith(Rdx);`
			`II->eraseFromParent();`
			`Changed = true;`
			`}`
			`return Changed;`
			`}`

			`class ExpandReductions : public FunctionPass {`
			`public:`
			`static char ID;`
			`ExpandReductions() : FunctionPass(ID) {`
			`initializeExpandReductionsPass(*PassRegistry::getPassRegistry());`
			`}`

			`bool runOnFunction(Function &F) override {`
			`const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);`
			`return expandReductions(F, TTI);`
			`}`

			`void getAnalysisUsage(AnalysisUsage &AU) const override {`
			`AU.addRequired<TargetTransformInfoWrapperPass>();`
			`AU.setPreservesCFG();`
			`}`
			`};`
			`}`

			`char ExpandReductions::ID;`
			`INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",`
			`"Expand reduction intrinsics", false, false)`
			`INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)`
			`INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",`
			`"Expand reduction intrinsics", false, false)`

			`FunctionPass *llvm::createExpandReductionsPass() {`
			`return new ExpandReductions();`
			`}`

			`PreservedAnalyses ExpandReductionsPass::run(Function &F,`
			`FunctionAnalysisManager &AM) {`
			`const auto &TTI = AM.getResult<TargetIRAnalysis>(F);`
			`if (!expandReductions(F, &TTI))`
			`return PreservedAnalyses::all();`
			`PreservedAnalyses PA;`
			`PA.preserveSet<CFGAnalyses>();`
			`return PA;`
			`}`