diff --git a/mindspore/nn/wrap/grad_reducer.py b/mindspore/nn/wrap/grad_reducer.py
index 930cabf478..47e3458c03 100644
--- a/mindspore/nn/wrap/grad_reducer.py
+++ b/mindspore/nn/wrap/grad_reducer.py
@@ -57,12 +57,15 @@ def _tensors_allreduce(degree, mean, allgather, allreduce, allreduce_filter, gra
         allreduce (Primitive): The communication operator for gradients.
         allreduce_filter (bool): When it is true, allreduce would apply.
         grad (Tensor): The gradient tensor before operation.
-        ps_parameter(Bool): Use parameter server or not.
+        ps_parameter (bool): Use parameter server or not.
 
     Returns:
         Tensor, the gradient tensor after operation.
     """
-    if not ps_parameter and allreduce_filter:
+    if ps_parameter:
+        return grad
+
+    if allreduce_filter:
         grad = allreduce(grad)
         if mean:
             degree = F.scalar_cast(degree, F.dtype(grad))
@@ -73,8 +76,8 @@ def _tensors_allreduce(degree, mean, allgather, allreduce, allreduce_filter, gra
     return grad
 
 
-@reduce_opt.register("Number", "Bool", "Function", "Function", "Bool", "IndexedSlices")
-def _tensors_allreduce_with_sparse(degree, mean, allgather, allreduce, allreduce_filter, grad):
+@reduce_opt.register("Number", "Bool", "Function", "Function", "Bool", "IndexedSlices", "Bool")
+def _tensors_allreduce_with_sparse(degree, mean, allgather, allreduce, allreduce_filter, grad, ps_parameter):
     """
     Apply allgather on gradient instead of allreduce for sparse feature.
     Allgather is a communication operation used for distributed deep learning.
@@ -86,10 +89,14 @@ def _tensors_allreduce_with_sparse(degree, mean, allgather, allreduce, allreduce
         allreduce (Primitive): The communication operator for gradients.
         allreduce_filter (bool): When it is true, allgather would apply.
         grad (tuple): The indices, gradient tensor and tensor_shape before operation.
+        ps_parameter (bool): Use parameter server or not.
 
     Returns:
         IndexedSlices, the gradient after operation.
     """
+    if ps_parameter:
+        return grad
+
     if allreduce_filter:
         indices = allgather(grad.indices())
         dout = allgather(grad.values())