[GISel][KnownBits] Add a cache mechanism to speed compile time

This patch adds a cache that is valid only for the duration of a call to getKnownBits. With such short lived cache we avoid all the problems of cache invalidation while still getting the benefits of reusing the information we already computed. This cache is useful whenever an instruction occurs more than once in a chain of computation. E.g., v0 = G_ADD v1, v2 v3 = G_ADD v0, v1 Previously we would compute the known bits for: v1, v2, v0, then v1 again and finally v3. With the patch, now we won't have to recompute v1 again. NFC
2020-02-20 17:51:26 -08:00 · 2020-02-20 17:51:26 -08:00 · 618dec2aef
parent 9708279c72
commit 618dec2aef
2 changed files with 37 additions and 8 deletions
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
@ -13,6 +13,7 @@
 #ifndef LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H
 #define LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H

+#include "llvm/ADT/DenseSet.h"
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Register.h"
@ -32,6 +33,8 @@ class GISelKnownBits : public GISelChangeObserver {
  const TargetLowering &TL;
  const DataLayout &DL;
  unsigned MaxDepth;
+  /// Cache maintained during a computeKnownBits request.
+  SmallDenseMap<Register, KnownBits, 16> ComputeKnownBitsCache;

 public:
  GISelKnownBits(MachineFunction &MF, unsigned MaxDepth = 6);
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@ -69,7 +69,10 @@ KnownBits GISelKnownBits::getKnownBits(Register R) {
  LLT Ty = MRI.getType(R);
  APInt DemandedElts =
      Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1);
+  // For now, we only maintain the cache during one request.
+  assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared");
  computeKnownBitsImpl(R, Known, DemandedElts);
+  ComputeKnownBitsCache.clear();
  return Known;
 }

@ -85,6 +88,17 @@ APInt GISelKnownBits::getKnownZeroes(Register R) {

 APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; }

+static void dumpResult(const MachineInstr &MI, const KnownBits &Known,
+                       unsigned Depth) {
+  dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
+         << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
+         << (Known.Zero | Known.One).toString(16, false) << "\n"
+         << "[" << Depth << "] Zero: 0x" << Known.Zero.toString(16, false)
+         << "\n"
+         << "[" << Depth << "] One:  0x" << Known.One.toString(16, false)
+         << "\n";
+}
+
 void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
                                          const APInt &DemandedElts,
                                          unsigned Depth) {
@ -102,6 +116,14 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
  }

  unsigned BitWidth = DstTy.getSizeInBits();
+  auto CacheEntry = ComputeKnownBitsCache.find(R);
+  if (CacheEntry != ComputeKnownBitsCache.end()) {
+    Known = CacheEntry->second;
+    LLVM_DEBUG(dbgs() << "Cache hit at ");
+    LLVM_DEBUG(dumpResult(MI, Known, Depth));
+    assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match");
+    return;
+  }
  Known = KnownBits(BitWidth); // Don't know anything

  if (DstTy.isVector())
@ -137,6 +159,14 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
    // point of the pipeline, otherwise the main live-range will be
    // defined more than once, which is against SSA.
    assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
+    // Record in the cache that we know nothing for MI.
+    // This will get updated later and in the meantime, if we reach that
+    // phi again, because of a loop, we will cut the search thanks to this
+    // cache entry. When this happens this cache entry is actually accurate,
+    // thus we are not losing anything by doing that, because right now,
+    // the main analysis will reach the maximum depth without being able
+    // to fully analyze the phi.
+    ComputeKnownBitsCache[R] = KnownBits(BitWidth);
    // PHI's operand are a mix of registers and basic blocks interleaved.
    // We only care about the register ones.
    for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
@ -374,14 +404,10 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
  }

  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-  LLVM_DEBUG(dbgs() << "[" << Depth << "] Compute known bits: " << MI << "["
-                    << Depth << "] Computed for: " << MI << "[" << Depth
-                    << "] Known: 0x"
-                    << (Known.Zero | Known.One).toString(16, false) << "\n"
-                    << "[" << Depth << "] Zero: 0x"
-                    << Known.Zero.toString(16, false) << "\n"
-                    << "[" << Depth << "] One:  0x"
-                    << Known.One.toString(16, false) << "\n");
+  LLVM_DEBUG(dumpResult(MI, Known, Depth));
+
+  // Update the cache.
+  ComputeKnownBitsCache[R] = Known;
 }

 unsigned GISelKnownBits::computeNumSignBits(Register R,