[llvm][LV] Replace `unsigned VF` with `ElementCount VF` [NFCI]

Changes: * Change `ToVectorTy` to deal directly with `ElementCount` instances. * `VF == 1` replaced with `VF.isScalar()`. * `VF > 1` and `VF >=2` replaced with `VF.isVector()`. * `VF <=1` is replaced with `VF.isZero() || VF.isScalar()`. * Add `<` operator to `ElementCount` to be able to use `llvm::SmallSetVector<ElementCount, ...>`. * Bits and pieces around printing the ElementCount to string streams. * Added a static method to `ElementCount` to represent a scalar. To guarantee that this change is a NFC, `VF.Min` and asserts are used in the following places: 1. When it doesn't make sense to deal with the scalable property, for example: a. When computing unrolling factors. b. When shuffle masks are built for fixed width vector types In this cases, an assert(!VF.Scalable && "<mgs>") has been added to make sure we don't enter coepaths that don't make sense for scalable vectors. 2. When there is a conscious decision to use `FixedVectorType`. These uses of `FixedVectorType` will likely be removed in favour of `VectorType` once the vectorizer is generic enough to deal with both fixed vector types and scalable vector types. 3. When dealing with building constants out of the value of VF, for example when computing the vectorization `step`, or building vectors of indices. These operation _make sense_ for scalable vectors too, but changing the code in these places to be generic and make it work for scalable vectors is to be submitted in a separate patch, as it is a functional change. 4. When building the potential VFs in VPlan. Making the VPlan generic enough to handle scalable vectorization factors is a functional change that needs a separate patch. See for example `void LoopVectorizationPlanner::buildVPlans(unsigned MinVF, unsigned MaxVF)`. 5. The class `IntrinsicCostAttribute`: this class still uses `unsigned VF` as updating the field to use `ElementCount` woudl require changes that could result in changing the behavior of the compiler. Will be done in a separate patch. 7. When dealing with user input for forcing the vectorization factor. In this case, adding support for scalable vectorization is a functional change that migh require changes at command line. Differential Revision: https://reviews.llvm.org/D85794
2020-08-07 22:03:24 +00:00 · 2020-08-07 22:03:24 +00:00 · c8d2b065b9
parent aec12c1264
commit c8d2b065b9
9 changed files with 491 additions and 335 deletions
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@ -128,6 +128,11 @@ public:

  IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
                          unsigned Factor);
+  IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
+                          ElementCount Factor)
+      : IntrinsicCostAttributes(Id, CI, Factor.Min) {
+    assert(!Factor.Scalable);
+  }

  IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
                          unsigned Factor, unsigned ScalarCost);
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@ -300,13 +300,17 @@ namespace Intrinsic {
 typedef unsigned ID;
 }

-/// A helper function for converting Scalar types to vector types.
-/// If the incoming type is void, we return void. If the VF is 1, we return
-/// the scalar type.
-inline Type *ToVectorTy(Type *Scalar, unsigned VF, bool isScalable = false) {
-  if (Scalar->isVoidTy() || VF == 1)
+/// A helper function for converting Scalar types to vector types. If
+/// the incoming type is void, we return void. If the EC represents a
+/// scalar, we return the scalar type.
+inline Type *ToVectorTy(Type *Scalar, ElementCount EC) {
+  if (Scalar->isVoidTy() || EC.isScalar())
    return Scalar;
-  return VectorType::get(Scalar, ElementCount::get(VF, isScalable));
+  return VectorType::get(Scalar, EC);
+}
+
+inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
+  return ToVectorTy(Scalar, ElementCount::getFixed(VF));
 }

 /// Identify if the intrinsic is trivially vectorizable.
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@ -21,6 +21,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Support/CBindingWrapping.h"
+#include "llvm/Support/TypeSize.h"
 #include "llvm/Support/YAMLTraits.h"
 #include <algorithm>
 #include <cstdint>
@ -434,6 +435,7 @@ public:
    Argument(StringRef Key, unsigned N);
    Argument(StringRef Key, unsigned long N);
    Argument(StringRef Key, unsigned long long N);
+    Argument(StringRef Key, ElementCount EC);
    Argument(StringRef Key, bool B) : Key(Key), Val(B ? "true" : "false") {}
    Argument(StringRef Key, DebugLoc dl);
  };
--- a/llvm/include/llvm/Support/TypeSize.h
+++ b/llvm/include/llvm/Support/TypeSize.h
@ -67,8 +67,33 @@ public:
  static ElementCount get(unsigned Min, bool Scalable) {
    return {Min, Scalable};
  }
+
+  /// Printing function.
+  void print(raw_ostream &OS) const {
+    if (Scalable)
+      OS << "vscale x ";
+    OS << Min;
+  }
+  /// Counting predicates.
+  ///
+  /// Notice that Min = 1 and Scalable = true is considered more than
+  /// one element.
+  ///
+  ///@{ No elements..
+  bool isZero() const { return Min == 0; }
+  /// Exactly one element.
+  bool isScalar() const { return !Scalable && Min == 1; }
+  /// One or more elements.
+  bool isVector() const { return (Scalable && Min != 0) || Min > 1; }
+  ///@}
 };

+/// Stream operator function for `ElementCount`.
+inline raw_ostream &operator<<(raw_ostream &OS, const ElementCount &EC) {
+  EC.print(OS);
+  return OS;
+}
+
 // This class is used to represent the size of types. If the type is of fixed
 // size, it will represent the exact size. If the type is a scalable vector,
 // it will represent the known minimum size.
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@ -213,6 +213,13 @@ DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key,
                                                   unsigned long long N)
    : Key(std::string(Key)), Val(utostr(N)) {}

+DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key,
+                                                   ElementCount EC)
+    : Key(std::string(Key)) {
+  raw_string_ostream OS(Val);
+  EC.print(OS);
+}
+
 DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, DebugLoc Loc)
    : Key(std::string(Key)), Loc(Loc) {
  if (Loc) {
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@ -172,12 +172,14 @@ public:
 /// Information about vectorization costs
 struct VectorizationFactor {
  // Vector width with best cost
-  unsigned Width;
+  ElementCount Width;
  // Cost of the loop with that width
  unsigned Cost;

  // Width 1 means no vectorization, cost 0 means uncomputed cost.
-  static VectorizationFactor Disabled() { return {1, 0}; }
+  static VectorizationFactor Disabled() {
+    return {ElementCount::getFixed(1), 0};
+  }

  bool operator==(const VectorizationFactor &rhs) const {
    return Width == rhs.Width && Cost == rhs.Cost;
@ -227,7 +229,10 @@ class LoopVectorizationPlanner {
  /// A builder used to construct the current plan.
  VPBuilder Builder;

-  unsigned BestVF = 0;
+  /// The best number of elements of the vector types used in the
+  /// transformed loop. BestVF = None means that vectorization is
+  /// disabled.
+  Optional<ElementCount> BestVF = None;
  unsigned BestUF = 0;

 public:
@ -242,14 +247,14 @@ public:

  /// Plan how to best vectorize, return the best VF and its cost, or None if
  /// vectorization and interleaving should be avoided up front.
-  Optional<VectorizationFactor> plan(unsigned UserVF, unsigned UserIC);
+  Optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);

  /// Use the VPlan-native path to plan how to best vectorize, return the best
  /// VF and its cost.
-  VectorizationFactor planInVPlanNativePath(unsigned UserVF);
+  VectorizationFactor planInVPlanNativePath(ElementCount UserVF);

  /// Finalize the best decision and dispose of all other VPlans.
-  void setBestPlan(unsigned VF, unsigned UF);
+  void setBestPlan(ElementCount VF, unsigned UF);

  /// Generate the IR code for the body of the vectorized loop according to the
  /// best selected VPlan.
@ -264,7 +269,7 @@ public:
  /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
  /// returned value holds for the entire \p Range.
  static bool
-  getDecisionAndClampRange(const std::function<bool(unsigned)> &Predicate,
+  getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
                           VFRange &Range);

 protected:
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@ -300,7 +300,8 @@ void VPRegionBlock::execute(VPTransformState *State) {

  for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
    State->Instance->Part = Part;
-    for (unsigned Lane = 0, VF = State->VF; Lane < VF; ++Lane) {
+    assert(!State->VF.Scalable && "VF is assumed to be non scalable.");
+    for (unsigned Lane = 0, VF = State->VF.Min; Lane < VF; ++Lane) {
      State->Instance->Lane = Lane;
      // Visit the VPBlocks connected to \p this, starting from it.
      for (VPBlockBase *Block : RPOT) {
@ -387,7 +388,7 @@ void VPInstruction::generateInstruction(VPTransformState &State,
    Value *ScalarBTC = State.get(getOperand(1), {Part, 0});

    auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
-    auto *PredTy = FixedVectorType::get(Int1Ty, State.VF);
+    auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.Min);
    Instruction *Call = Builder.CreateIntrinsic(
        Intrinsic::get_active_lane_mask, {PredTy, ScalarBTC->getType()},
        {VIVElem0, ScalarBTC}, nullptr, "active.lane.mask");
@ -838,14 +839,15 @@ void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
  Value *CanonicalIV = State.CanonicalIV;
  Type *STy = CanonicalIV->getType();
  IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
-  auto VF = State.VF;
-  Value *VStart = VF == 1
-                      ? CanonicalIV
-                      : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
+  ElementCount VF = State.VF;
+  assert(!VF.Scalable && "the code following assumes non scalables ECs");
+  Value *VStart = VF.isScalar() ? CanonicalIV
+                                : Builder.CreateVectorSplat(VF.Min, CanonicalIV,
+                                                            "broadcast");
  for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
    SmallVector<Constant *, 8> Indices;
-    for (unsigned Lane = 0; Lane < VF; ++Lane)
-      Indices.push_back(ConstantInt::get(STy, Part * VF + Lane));
+    for (unsigned Lane = 0; Lane < VF.Min; ++Lane)
+      Indices.push_back(ConstantInt::get(STy, Part * VF.Min + Lane));
    // If VF == 1, there is only one iteration in the loop above, thus the
    // element pushed back into Indices is ConstantInt::get(STy, Part)
    Constant *VStep = VF == 1 ? Indices.back() : ConstantVector::get(Indices);
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@ -115,7 +115,7 @@ private:

  /// The vectorization factor. Each entry in the scalar map contains UF x VF
  /// scalar values.
-  unsigned VF;
+  ElementCount VF;

  /// The vector and scalar map storage. We use std::map and not DenseMap
  /// because insertions to DenseMap invalidate its iterators.
@ -126,7 +126,7 @@ private:

 public:
  /// Construct an empty map with the given unroll and vectorization factors.
-  VectorizerValueMap(unsigned UF, unsigned VF) : UF(UF), VF(VF) {}
+  VectorizerValueMap(unsigned UF, ElementCount VF) : UF(UF), VF(VF) {}

  /// \return True if the map has any vector entry for \p Key.
  bool hasAnyVectorValue(Value *Key) const {
@ -151,12 +151,14 @@ public:
  /// \return True if the map has a scalar entry for \p Key and \p Instance.
  bool hasScalarValue(Value *Key, const VPIteration &Instance) const {
    assert(Instance.Part < UF && "Queried Scalar Part is too large.");
-    assert(Instance.Lane < VF && "Queried Scalar Lane is too large.");
+    assert(Instance.Lane < VF.Min && "Queried Scalar Lane is too large.");
+    assert(!VF.Scalable && "VF is assumed to be non scalable.");
+
    if (!hasAnyScalarValue(Key))
      return false;
    const ScalarParts &Entry = ScalarMapStorage.find(Key)->second;
    assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");
-    assert(Entry[Instance.Part].size() == VF &&
+    assert(Entry[Instance.Part].size() == VF.Min &&
           "ScalarParts has wrong dimensions.");
    return Entry[Instance.Part][Instance.Lane] != nullptr;
  }
@ -195,7 +197,7 @@ public:
      // TODO: Consider storing uniform values only per-part, as they occupy
      //       lane 0 only, keeping the other VF-1 redundant entries null.
      for (unsigned Part = 0; Part < UF; ++Part)
-        Entry[Part].resize(VF, nullptr);
+        Entry[Part].resize(VF.Min, nullptr);
      ScalarMapStorage[Key] = Entry;
    }
    ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
@ -234,14 +236,15 @@ struct VPCallback {
 /// VPTransformState holds information passed down when "executing" a VPlan,
 /// needed for generating the output IR.
 struct VPTransformState {
-  VPTransformState(unsigned VF, unsigned UF, LoopInfo *LI, DominatorTree *DT,
-                   IRBuilder<> &Builder, VectorizerValueMap &ValueMap,
-                   InnerLoopVectorizer *ILV, VPCallback &Callback)
+  VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
+                   DominatorTree *DT, IRBuilder<> &Builder,
+                   VectorizerValueMap &ValueMap, InnerLoopVectorizer *ILV,
+                   VPCallback &Callback)
      : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder),
        ValueMap(ValueMap), ILV(ILV), Callback(Callback) {}

  /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
-  unsigned VF;
+  ElementCount VF;
  unsigned UF;

  /// Hold the indices to generate specific scalar instructions. Null indicates
@ -1583,7 +1586,7 @@ class VPlan {
  VPBlockBase *Entry;

  /// Holds the VFs applicable to this VPlan.
-  SmallSet<unsigned, 2> VFs;
+  SmallSetVector<ElementCount, 2> VFs;

  /// Holds the name of the VPlan, for printing.
  std::string Name;
@ -1647,9 +1650,9 @@ public:
    return BackedgeTakenCount;
  }

-  void addVF(unsigned VF) { VFs.insert(VF); }
+  void addVF(ElementCount VF) { VFs.insert(VF); }

-  bool hasVF(unsigned VF) { return VFs.count(VF); }
+  bool hasVF(ElementCount VF) { return VFs.count(VF); }

  const std::string &getName() const { return Name; }