diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 9ccdc95e9ba4..2eea380c49bd 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -6903,19 +6903,22 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
   // TODO: Can we put this a common method for DAG?
   auto SkipRCCopy = [](SDValue V) {
     while (V->isMachineOpcode() &&
-           V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS)
+           V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
+      // All values in the chain should have single use.
+      if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode()))
+        return SDValue();
       V = V->getOperand(0);
-    return V;
+    }
+    return V.hasOneUse() ? V : SDValue();
   };
 
   SDValue VecOp = SkipRCCopy(N->getOperand(0));
-  if (!isLaneInsensitive(VecOp) || !VecOp.hasOneUse())
+  if (!VecOp || !isLaneInsensitive(VecOp))
     return;
 
   SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
           RHS = SkipRCCopy(VecOp.getOperand(1));
-  if (!LHS.hasOneUse() || !RHS.hasOneUse() || !isVSXSwap(LHS) ||
-      !isVSXSwap(RHS))
+  if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
     return;
 
   // These swaps may still have chain-uses here, count on dead code elimination
diff --git a/llvm/test/CodeGen/PowerPC/swap-reduction.ll b/llvm/test/CodeGen/PowerPC/swap-reduction.ll
index eb1f5b728b03..a2e7176654f8 100644
--- a/llvm/test/CodeGen/PowerPC/swap-reduction.ll
+++ b/llvm/test/CodeGen/PowerPC/swap-reduction.ll
@@ -83,5 +83,31 @@ entry:
   ret i16 %2
 }
 
+define signext i32 @vecop_uses2([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c) {
+; CHECK-LABEL: vecop_uses2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    lxvd2x 1, 0, 4
+; CHECK-NEXT:    xxswapd 34, 0
+; CHECK-NEXT:    xxswapd 35, 1
+; CHECK-NEXT:    xxsldwi 0, 34, 34, 3
+; CHECK-NEXT:    vmuluwm 2, 3, 2
+; CHECK-NEXT:    mffprwz 3, 0
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    extsw 3, 3
+; CHECK-NEXT:    stxvd2x 0, 0, 5
+; CHECK-NEXT:    blr
+entry:
+  %0 = bitcast [4 x i32]* %a to <4 x i32>*
+  %1 = load <4 x i32>, <4 x i32>* %0, align 4
+  %2 = bitcast [4 x i32]* %b to <4 x i32>*
+  %3 = load <4 x i32>, <4 x i32>* %2, align 4
+  %4 = mul <4 x i32> %3, %1
+  %5 = bitcast [4 x i32]* %c to <4 x i32>*
+  store <4 x i32> %4, <4 x i32>* %5, align 4
+  %6 = extractelement <4 x i32> %1, i32 3
+  ret i32 %6
+}
+
 declare <16 x i8> @llvm.ppc.altivec.vavgsb(<16 x i8>, <16 x i8>)
 declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)