[Reassociate] Don't convert add-like-or's into add's if they appear to be part of load-combining idiom

As Wei Mi is reporting in post-commit review https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20201116/853479.html teaching -reassociate about add-like-or's (70472f3) results in breaking apart load widening patterns, and reassociating them. For now, simply exclude any such `or` that appears to be a root of load widening idiom from the or->add transformation. Note that the heuristic is greedy, it doesn't ensure that loads can *actually* be widened into a single load.
2020-11-18 17:21:04 +03:00 · 2020-11-18 17:21:04 +03:00 · 34ff90ad5d
parent 1a3428163d
commit 34ff90ad5d
2 changed files with 66 additions and 8 deletions
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@ -920,8 +920,66 @@ static Value *NegateValue(Value *V, Instruction *BI,
  return NewNeg;
 }

+// See if this `or` looks like an load widening reduction, i.e. that it
+// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
+// ensure that the pattern is *really* a load widening reduction,
+// we do not ensure that it can really be replaced with a widened load,
+// only that it mostly looks like one.
+static bool isLoadCombineCandidate(Instruction *Or) {
+  SmallVector<Instruction *, 8> Worklist;
+  SmallSet<Instruction *, 8> Visited;
+
+  auto Enqueue = [&](Value *V) {
+    auto *I = dyn_cast<Instruction>(V);
+    // Each node of an `or` reduction must be an instruction,
+    if (!I)
+      return false; // Node is certainly not part of an `or` load reduction.
+    // Only process instructions we have never processed before.
+    if (Visited.insert(I).second)
+      Worklist.emplace_back(I);
+    return true; // Will need to look at parent nodes.
+  };
+
+  if (!Enqueue(Or))
+    return false; // Not an `or` reduction pattern.
+
+  while (!Worklist.empty()) {
+    auto *I = Worklist.pop_back_val();
+
+    // Okay, which instruction is this node?
+    switch (I->getOpcode()) {
+    case Instruction::Or:
+      // Got an `or` node. That's fine, just recurse into it's operands.
+      for (Value *Op : I->operands())
+        if (!Enqueue(Op))
+          return false; // Not an `or` reduction pattern.
+      continue;
+
+    case Instruction::Shl:
+    case Instruction::ZExt:
+      // `shl`/`zext` nodes are fine, just recurse into their base operand.
+      if (!Enqueue(I->getOperand(0)))
+        return false; // Not an `or` reduction pattern.
+      continue;
+
+    case Instruction::Load:
+      // Perfect, `load` node means we've reached an edge of the graph.
+      continue;
+
+    default:        // Unknown node.
+      return false; // Not an `or` reduction pattern.
+    }
+  }
+
+  return true;
+}
+
 /// Return true if it may be profitable to convert this (X|Y) into (X+Y).
 static bool ShouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
+  // If this `or` appears to be a part of an load widening reduction, ignore it.
+  if (isLoadCombineCandidate(Or))
+    return false;
+
  // Don't bother to convert this up unless either the LHS is an associable add
  // or subtract or mul or if this is only used by one of the above.
  // This is only a compile-time improvement, it is not needed for correctness!
--- a/llvm/test/Transforms/Reassociate/load-combine-like-or.ll
+++ b/llvm/test/Transforms/Reassociate/load-combine-like-or.ll
@ -10,8 +10,8 @@ define i16 @p0_i8_i8_i16(i8* %ptr) {
 ; CHECK-NEXT:    [[I4:%.*]] = shl i16 [[I3]], 8
 ; CHECK-NEXT:    [[I5:%.*]] = load i8, i8* [[PTR]], align 1
 ; CHECK-NEXT:    [[I6:%.*]] = zext i8 [[I5]] to i16
-; CHECK-NEXT:    [[I7:%.*]] = add i16 [[I6]], 42
-; CHECK-NEXT:    [[I8:%.*]] = add i16 [[I7]], [[I4]]
+; CHECK-NEXT:    [[I7:%.*]] = or i16 [[I4]], [[I6]]
+; CHECK-NEXT:    [[I8:%.*]] = add i16 [[I7]], 42
 ; CHECK-NEXT:    ret i16 [[I8]]
 ;
  %i = getelementptr inbounds i8, i8* %ptr, i64 1
@ -34,8 +34,8 @@ define i16 @p1_i8_i8_i16_swapped(i8* %ptr) {
 ; CHECK-NEXT:    [[I4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 1
 ; CHECK-NEXT:    [[I5:%.*]] = load i8, i8* [[I4]], align 1
 ; CHECK-NEXT:    [[I6:%.*]] = zext i8 [[I5]] to i16
-; CHECK-NEXT:    [[I7:%.*]] = add i16 [[I6]], 42
-; CHECK-NEXT:    [[I8:%.*]] = add i16 [[I7]], [[I3]]
+; CHECK-NEXT:    [[I7:%.*]] = or i16 [[I3]], [[I6]]
+; CHECK-NEXT:    [[I8:%.*]] = add i16 [[I7]], 42
 ; CHECK-NEXT:    ret i16 [[I8]]
 ;
  %i = load i8, i8* %ptr
@ -58,8 +58,8 @@ define i16 @p2(i8* %ptr) {
 ; CHECK-NEXT:    [[I4:%.*]] = shl i16 [[I3]], 9
 ; CHECK-NEXT:    [[I5:%.*]] = load i8, i8* [[PTR]], align 1
 ; CHECK-NEXT:    [[I6:%.*]] = zext i8 [[I5]] to i16
-; CHECK-NEXT:    [[I7:%.*]] = add i16 [[I6]], 42
-; CHECK-NEXT:    [[I8:%.*]] = add i16 [[I7]], [[I4]]
+; CHECK-NEXT:    [[I7:%.*]] = or i16 [[I4]], [[I6]]
+; CHECK-NEXT:    [[I8:%.*]] = add i16 [[I7]], 42
 ; CHECK-NEXT:    ret i16 [[I8]]
 ;
  %i = getelementptr inbounds i8, i8* %ptr, i64 1
@ -79,8 +79,8 @@ define i16 @p3(i8* %ptr) {
 ; CHECK-NEXT:    [[I:%.*]] = load i8, i8* [[PTR:%.*]], align 1
 ; CHECK-NEXT:    [[I2:%.*]] = zext i8 [[I]] to i16
 ; CHECK-NEXT:    [[I3:%.*]] = shl i16 [[I2]], 8
-; CHECK-NEXT:    [[I4:%.*]] = add i16 [[I2]], 42
-; CHECK-NEXT:    [[I5:%.*]] = add i16 [[I4]], [[I3]]
+; CHECK-NEXT:    [[I4:%.*]] = or i16 [[I3]], [[I2]]
+; CHECK-NEXT:    [[I5:%.*]] = add i16 [[I4]], 42
 ; CHECK-NEXT:    ret i16 [[I5]]
 ;
  %i = load i8, i8* %ptr