[Dominators] Simplify and optimize path compression used in link-eval forest.

Summary: * NodeToInfo[*] have been allocated so the addresses are stable. We can store them instead of NodePtr to save NumToNode lookups. * Nodes are traversed twice. Using `Visited` to check the traversal number is expensive and obscure. Just split the two traversals into two loops explicitly. * The check `VInInfo.DFSNum < LastLinked` is redundant as it is implied by `VInInfo->Parent < LastLinked` * VLabelInfo PLabelInfo are used to save a NodeToInfo lookup in the second traversal. Also add some comments explaining eval(). This shows a ~4.5% improvement (9.8444s -> 9.3996s) on perf stat -r 10 taskset -c 0 opt -passes=$(printf '%.0srequire<domtree>,invalidate<domtree>,' {1..1000})'require<domtree>' -disable-output sqlite-autoconf-3270100/sqlite3.bc Reviewers: kuhar, sanjoy, asbirlea Reviewed By: kuhar Subscribers: brzycki, NutshellySima, kristina, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D58327 llvm-svn: 354433
2019-02-20 04:39:42 +00:00 · 2019-02-20 04:39:42 +00:00 · d990c2a9e2
parent b94dde7f9b
commit d990c2a9e2
1 changed files with 47 additions and 38 deletions
--- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h
@ -15,9 +15,12 @@
 ///   Loukas Georgiadis, Princeton University, November 2005, pp. 21-23:
 ///   ftp://ftp.cs.princeton.edu/reports/2005/737.pdf
 ///
-/// This implements the O(n*log(n)) versions of EVAL and LINK, because it turns
-/// out that the theoretically slower O(n*log(n)) implementation is actually
-/// faster than the almost-linear O(n*alpha(n)) version, even for large CFGs.
+/// Semi-NCA algorithm runs in O(n^2) worst-case time but usually slightly
+/// faster than Simple Lengauer-Tarjan in practice.
+///
+/// O(n^2) worst cases happen when the computation of nearest common ancestors
+/// requires O(n) average time, which is very unlikely in real world. If this
+/// ever turns out to be an issue, consider implementing a hybrid algorithm.
 ///
 /// The file uses the Depth Based Search algorithm to perform incremental
 /// updates (insertion and deletions). The implemented algorithm is based on
@ -254,42 +257,47 @@ struct SemiNCAInfo {
    return LastNum;
  }

-  NodePtr eval(NodePtr VIn, unsigned LastLinked) {
-    auto &VInInfo = NodeToInfo[VIn];
-    if (VInInfo.DFSNum < LastLinked)
-      return VIn;
+  // V is a predecessor of W. eval() returns V if V < W, otherwise the minimum
+  // of sdom(U), where U > W and there is a virtual forest path from U to V. The
+  // virtual forest consists of linked edges of processed vertices.
+  //
+  // We can follow Parent pointers (virtual forest edges) to determine the
+  // ancestor U with minimum sdom(U). But it is slow and thus we employ the path
+  // compression technique to speed up to O(m*log(n)). Theoretically the virtual
+  // forest can be organized as balanced trees to achieve almost linear
+  // O(m*alpha(m,n)) running time. But it requires two auxiliary arrays (Size
+  // and Child) and is unlikely to be faster than the simple implementation.
+  //
+  // For each vertex V, its Label points to the vertex with the minimal sdom(U)
+  // (Semi) in its path from V (included) to NodeToInfo[V].Parent (excluded).
+  NodePtr eval(NodePtr V, unsigned LastLinked,
+               SmallVectorImpl<InfoRec *> &Stack) {
+    InfoRec *VInfo = &NodeToInfo[V];
+    if (VInfo->Parent < LastLinked)
+      return VInfo->Label;

-    SmallVector<NodePtr, 32> Work;
-    SmallPtrSet<NodePtr, 32> Visited;
+    // Store ancestors except the last (root of a virtual tree) into a stack.
+    assert(Stack.empty());
+    do {
+      Stack.push_back(VInfo);
+      VInfo = &NodeToInfo[NumToNode[VInfo->Parent]];
+    } while (VInfo->Parent >= LastLinked);

-    if (VInInfo.Parent >= LastLinked)
-      Work.push_back(VIn);
-
-    while (!Work.empty()) {
-      NodePtr V = Work.back();
-      auto &VInfo = NodeToInfo[V];
-      NodePtr VAncestor = NumToNode[VInfo.Parent];
-
-      // Process Ancestor first
-      if (Visited.insert(VAncestor).second && VInfo.Parent >= LastLinked) {
-        Work.push_back(VAncestor);
-        continue;
-      }
-      Work.pop_back();
-
-      // Update VInfo based on Ancestor info
-      if (VInfo.Parent < LastLinked)
-        continue;
-
-      auto &VAInfo = NodeToInfo[VAncestor];
-      NodePtr VAncestorLabel = VAInfo.Label;
-      NodePtr VLabel = VInfo.Label;
-      if (NodeToInfo[VAncestorLabel].Semi < NodeToInfo[VLabel].Semi)
-        VInfo.Label = VAncestorLabel;
-      VInfo.Parent = VAInfo.Parent;
-    }
-
-    return VInInfo.Label;
+    // Path compression. Point each vertex's Parent to the root and update its
+    // Label if any of its ancestors (PInfo->Label) has a smaller Semi.
+    const InfoRec *PInfo = VInfo;
+    const InfoRec *PLabelInfo = &NodeToInfo[PInfo->Label];
+    do {
+      VInfo = Stack.pop_back_val();
+      VInfo->Parent = PInfo->Parent;
+      const InfoRec *VLabelInfo = &NodeToInfo[VInfo->Label];
+      if (PLabelInfo->Semi < VLabelInfo->Semi)
+        VInfo->Label = PInfo->Label;
+      else
+        PLabelInfo = VLabelInfo;
+      PInfo = VInfo;
+    } while (!Stack.empty());
+    return VInfo->Label;
  }

  // This function requires DFS to be run before calling it.
@ -303,6 +311,7 @@ struct SemiNCAInfo {
    }

    // Step #1: Calculate the semidominators of all vertices.
+    SmallVector<InfoRec *, 32> EvalStack;
    for (unsigned i = NextDFSNum - 1; i >= 2; --i) {
      NodePtr W = NumToNode[i];
      auto &WInfo = NodeToInfo[W];
@ -318,7 +327,7 @@ struct SemiNCAInfo {
        if (TN && TN->getLevel() < MinLevel)
          continue;

-        unsigned SemiU = NodeToInfo[eval(N, i + 1)].Semi;
+        unsigned SemiU = NodeToInfo[eval(N, i + 1, EvalStack)].Semi;
        if (SemiU < WInfo.Semi) WInfo.Semi = SemiU;
      }
    }