[PowerPC] Fix use check of swap-reduction

This will fix swap-reduction in DAGISel for cases where COPY_TO_REGCLASS has multiple uses.
2021-04-07 15:55:52 +08:00 · 2021-04-07 15:55:52 +08:00 · 033c9c2552
parent 030ac786d4
commit 033c9c2552
2 changed files with 34 additions and 5 deletions
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@ -6903,19 +6903,22 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
  // TODO: Can we put this a common method for DAG?
  auto SkipRCCopy = [](SDValue V) {
    while (V->isMachineOpcode() &&
-           V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS)
+           V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
+      // All values in the chain should have single use.
+      if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode()))
+        return SDValue();
      V = V->getOperand(0);
-    return V;
+    }
+    return V.hasOneUse() ? V : SDValue();
  };

  SDValue VecOp = SkipRCCopy(N->getOperand(0));
-  if (!isLaneInsensitive(VecOp) || !VecOp.hasOneUse())
+  if (!VecOp || !isLaneInsensitive(VecOp))
    return;

  SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
          RHS = SkipRCCopy(VecOp.getOperand(1));
-  if (!LHS.hasOneUse() || !RHS.hasOneUse() || !isVSXSwap(LHS) ||
-      !isVSXSwap(RHS))
+  if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
    return;

  // These swaps may still have chain-uses here, count on dead code elimination
--- a/llvm/test/CodeGen/PowerPC/swap-reduction.ll
+++ b/llvm/test/CodeGen/PowerPC/swap-reduction.ll
@ -83,5 +83,31 @@ entry:
  ret i16 %2
 }

+define signext i32 @vecop_uses2([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c) {
+; CHECK-LABEL: vecop_uses2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    lxvd2x 1, 0, 4
+; CHECK-NEXT:    xxswapd 34, 0
+; CHECK-NEXT:    xxswapd 35, 1
+; CHECK-NEXT:    xxsldwi 0, 34, 34, 3
+; CHECK-NEXT:    vmuluwm 2, 3, 2
+; CHECK-NEXT:    mffprwz 3, 0
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    extsw 3, 3
+; CHECK-NEXT:    stxvd2x 0, 0, 5
+; CHECK-NEXT:    blr
+entry:
+  %0 = bitcast [4 x i32]* %a to <4 x i32>*
+  %1 = load <4 x i32>, <4 x i32>* %0, align 4
+  %2 = bitcast [4 x i32]* %b to <4 x i32>*
+  %3 = load <4 x i32>, <4 x i32>* %2, align 4
+  %4 = mul <4 x i32> %3, %1
+  %5 = bitcast [4 x i32]* %c to <4 x i32>*
+  store <4 x i32> %4, <4 x i32>* %5, align 4
+  %6 = extractelement <4 x i32> %1, i32 3
+  ret i32 %6
+}
+
 declare <16 x i8> @llvm.ppc.altivec.vavgsb(<16 x i8>, <16 x i8>)
 declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)