[SelectionDAG] Allow FindMemType to fail when widening loads & stores

This patch removes an internal failure found in FindMemType and "bubbles it up" to the users of that method: GenWidenVectorLoads and GenWidenVectorStores. FindMemType -- renamed findMemType -- now returns an optional value, returning None if no such type is found. Each of the aforementioned users now pre-calculates the list of types it will use to widen the memory access. If the type breakdown is not possible they will signal a failure, at which point the compiler will crash as it does currently. This patch is preparing the ground for alternative legalization strategies for vector loads and stores, such as using vector-predication versions of loads or stores. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D112000
2021-10-18 15:16:56 +01:00 · 2021-10-18 15:16:56 +01:00 · 8314a04ede
parent 3b285ff517
commit 8314a04ede
2 changed files with 93 additions and 43 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@ -983,10 +983,10 @@ private:
                                 LoadSDNode *LD, ISD::LoadExtType ExtType);

  /// Helper function to generate a set of stores to store a widen vector into
-  /// non-widen memory.
+  /// non-widen memory. Returns true if successful, false otherwise.
  ///   StChain: list of chains for the stores we have generated
  ///   ST:      store of a widen value
-  void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
+  bool GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);

  /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
  /// input vector must have the same element type as NVT.
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -4178,6 +4178,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
  else
    Result = GenWidenVectorLoads(LdChain, LD);

+  if (!Result)
+    report_fatal_error("Unable to widen vector load");
+
  // If we generate a single load, we can use that for the chain.  Otherwise,
  // build a factor node to remember the multiple loads are independent and
  // chain to that.
@ -5032,11 +5035,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
    return TLI.scalarizeVectorStore(ST, DAG);

  SmallVector<SDValue, 16> StChain;
-  GenWidenVectorStores(StChain, ST);
+  if (!GenWidenVectorStores(StChain, ST))
+    report_fatal_error("Unable to widen vector store");

  if (StChain.size() == 1)
    return StChain[0];
-  else
+
  return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
 }

@ -5288,7 +5292,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
 //  Align:     If 0, don't allow use of a wider type
 //  WidenEx:   If Align is not 0, the amount additional we can load/store from.

-static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
+static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI,
                                 unsigned Width, EVT WidenVT,
                                 unsigned Align = 0, unsigned WidenEx = 0) {
  EVT WidenEltVT = WidenVT.getVectorElementType();
@ -5345,9 +5349,11 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
    }
  }

+  // Using element-wise loads and stores for widening operations is not
+  // supported for scalable vectors
  if (Scalable)
-    report_fatal_error("Using element-wise loads and stores for widening "
-                       "operations is not supported for scalable vectors");
+    return None;
+
  return RetVT;
 }

@ -5411,29 +5417,59 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
      (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment();

  // Find the vector type that can load from.
-  EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
+  Optional<EVT> FirstVT =
+      findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
                  WidthDiff.getKnownMinSize());
-  TypeSize NewVTWidth = NewVT.getSizeInBits();
-  SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+
+  if (!FirstVT)
+    return SDValue();
+
+  SmallVector<EVT, 8> MemVTs;
+  TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+  // Unless we're able to load in one instruction we must work out how to load
+  // the remainder.
+  if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) {
+    Optional<EVT> NewVT = FirstVT;
+    TypeSize RemainingWidth = LdWidth;
+    TypeSize NewVTWidth = FirstVTWidth;
+    do {
+      RemainingWidth -= NewVTWidth;
+      if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) {
+        // The current type we are using is too large. Find a better size.
+        NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT,
+                            LdAlign, WidthDiff.getKnownMinSize());
+        if (!NewVT)
+          return SDValue();
+        NewVTWidth = NewVT->getSizeInBits();
+      }
+      MemVTs.push_back(*NewVT);
+    } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
+  }
+
+  SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
                             LD->getOriginalAlign(), MMOFlags, AAInfo);
  LdChain.push_back(LdOp.getValue(1));

  // Check if we can load the element with one instruction.
-  if (TypeSize::isKnownLE(LdWidth, NewVTWidth)) {
-    if (!NewVT.isVector()) {
-      unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
-      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+  if (MemVTs.empty()) {
+    assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+    if (!FirstVT->isVector()) {
+      unsigned NumElts =
+          WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
+      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
      SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
      return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
    }
-    if (NewVT == WidenVT)
+    if (FirstVT == WidenVT)
      return LdOp;

    // TODO: We don't currently have any tests that exercise this code path.
-    assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0);
-    unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
+    assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0);
+    unsigned NumConcat =
+        WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
    SmallVector<SDValue, 16> ConcatOps(NumConcat);
-    SDValue UndefVal = DAG.getUNDEF(NewVT);
+    SDValue UndefVal = DAG.getUNDEF(*FirstVT);
    ConcatOps[0] = LdOp;
    for (unsigned i = 1; i != NumConcat; ++i)
      ConcatOps[i] = UndefVal;
@ -5446,28 +5482,22 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,

  uint64_t ScaledOffset = 0;
  MachinePointerInfo MPI = LD->getPointerInfo();
-  do {
-    LdWidth -= NewVTWidth;
-    IncrementPointer(cast<LoadSDNode>(LdOp), NewVT, MPI, BasePtr,
+
+  // First incremement past the first load.
+  IncrementPointer(cast<LoadSDNode>(LdOp), *FirstVT, MPI, BasePtr,
                   &ScaledOffset);

-    if (TypeSize::isKnownLT(LdWidth, NewVTWidth)) {
-      // The current type we are using is too large. Find a better size.
-      NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
-                          WidthDiff.getKnownMinSize());
-      NewVTWidth = NewVT.getSizeInBits();
-    }
-
+  for (EVT MemVT : MemVTs) {
    Align NewAlign = ScaledOffset == 0
                         ? LD->getOriginalAlign()
                         : commonAlignment(LD->getAlign(), ScaledOffset);
    SDValue L =
-        DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
-    LdChain.push_back(L.getValue(1));
+        DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);

    LdOps.push_back(L);
-    LdOp = L;
-  } while (TypeSize::isKnownGT(LdWidth, NewVTWidth));
+    LdChain.push_back(L.getValue(1));
+    IncrementPointer(cast<LoadSDNode>(L), MemVT, MPI, BasePtr, &ScaledOffset);
+  }

  // Build the vector from the load operations.
  unsigned End = LdOps.size();
@ -5589,7 +5619,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
  return DAG.getBuildVector(WidenVT, dl, Ops);
 }

-void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
+bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
                                            StoreSDNode *ST) {
  // The strategy assumes that we can efficiently store power-of-two widths.
  // The routine chops the vector into the largest vector stores with the same
@ -5615,9 +5645,30 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,

  MachinePointerInfo MPI = ST->getPointerInfo();
  uint64_t ScaledOffset = 0;
+
+  // A breakdown of how to widen this vector store. Each element of the vector
+  // is a memory VT combined with the number of times it is to be stored to,
+  // e,g., v5i32 -> {{v2i32,2},{i32,1}}
+  SmallVector<std::pair<EVT, unsigned>, 4> MemVTs;
+
  while (StWidth.isNonZero()) {
    // Find the largest vector type we can store with.
-    EVT NewVT = FindMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+    Optional<EVT> NewVT =
+        findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+    if (!NewVT)
+      return false;
+    MemVTs.push_back({*NewVT, 0});
+    TypeSize NewVTWidth = NewVT->getSizeInBits();
+
+    do {
+      StWidth -= NewVTWidth;
+      MemVTs.back().second++;
+    } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+  }
+
+  for (const auto &Pair : MemVTs) {
+    EVT NewVT = Pair.first;
+    unsigned Count = Pair.second;
    TypeSize NewVTWidth = NewVT.getSizeInBits();

    if (NewVT.isVector()) {
@ -5632,12 +5683,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
                                         MMOFlags, AAInfo);
        StChain.push_back(PartStore);

-        StWidth -= NewVTWidth;
        Idx += NumVTElts;
-
        IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr,
                         &ScaledOffset);
-      } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+      } while (--Count);
    } else {
      // Cast the vector to the scalar type we can store.
      unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize();
@ -5653,13 +5702,14 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
                         MMOFlags, AAInfo);
        StChain.push_back(PartStore);

-        StWidth -= NewVTWidth;
        IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr);
-      } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+      } while (--Count);
      // Restore index back to be relative to the original widen element type.
      Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth;
    }
  }
+
+  return true;
 }

 /// Modifies a vector input (widen or narrows) to a vector of NVT.  The