!16629 Binary_cross_entropy op supports optional weight input

From: @zuochuanyong Reviewed-by: @zhoufeng54,@liangchenghui Signed-off-by: @liangchenghui
2021-05-25 09:26:00 +08:00 · 2021-05-25 09:26:00 +08:00 · e1803a0938
parent f64bc04f95 fc06d0562c
commit e1803a0938
11 changed files with 193 additions and 37 deletions
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/binary_cross_entropy_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/binary_cross_entropy_cpu_kernel.cc
@ -17,6 +17,8 @@

 namespace mindspore {
 namespace kernel {
+constexpr size_t kBceInputNumWithWeight = 3;
+
 template <typename T>
 void BinaryCrossEntropyCpuKernel::LaunchToScalar(const int &input_size, const int &reduction, T *loss, T *tmp_loss) {
  if (input_size % 2 == 1) {
@ -44,24 +46,37 @@ void BinaryCrossEntropyCpuKernel::Launchkernel(const std::vector<AddressPtr> &in
                                               const std::vector<AddressPtr> &outputs) {
  T *input_x = reinterpret_cast<T *>(inputs[0]->addr);
  T *input_y = reinterpret_cast<T *>(inputs[1]->addr);
-  T *weight = reinterpret_cast<T *>(inputs[2]->addr);
+  T *weight = nullptr;
+  if (weight_defined_) {
+    weight = reinterpret_cast<T *>(inputs[2]->addr);
+  }
  T *loss = reinterpret_cast<T *>(outputs[0]->addr);
  std::vector<T> tmp_loss(input_size_);

  T epsilon = static_cast<T>(1e-12);
  T one = static_cast<T>(1);
-  if (reduction_ == 0) {
+  if (reduction_ == 0 && weight_defined_) {
    for (size_t i = 0; i < input_size_; i++) {
      T value =
        -weight[i] * (input_y[i] * log(input_x[i] + epsilon) + (one - input_y[i]) * log(one - input_x[i] + epsilon));
      loss[i] = value;
    }
-  } else {
+  } else if (reduction_ == 0 && (!weight_defined_)) {
+    for (size_t i = 0; i < input_size_; i++) {
+      T value = -(input_y[i] * log(input_x[i] + epsilon) + (one - input_y[i]) * log(one - input_x[i] + epsilon));
+      loss[i] = value;
+    }
+  } else if ((reduction_ != 0) && weight_defined_) {
    for (size_t i = 0; i < input_size_; i++) {
      T value =
        -weight[i] * (input_y[i] * log(input_x[i] + epsilon) + (one - input_y[i]) * log(one - input_x[i] + epsilon));
      tmp_loss[i] = value;
    }
+  } else {
+    for (size_t i = 0; i < input_size_; i++) {
+      T value = -(input_y[i] * log(input_x[i] + epsilon) + (one - input_y[i]) * log(one - input_x[i] + epsilon));
+      tmp_loss[i] = value;
+    }
  }

  if (reduction_ != 0) {
@ -93,7 +108,8 @@ void BinaryCrossEntropyCpuKernel::InitKernel(const CNodePtr &kernel_node) {
  } else if (reduction == "sum") {
    reduction_ = 2;
  }
-
+  size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
+  weight_defined_ = (input_num == kBceInputNumWithWeight);
  dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
 }
 }  // namespace kernel
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/binary_cross_entropy_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/binary_cross_entropy_cpu_kernel.h
@ -25,7 +25,7 @@ namespace mindspore {
 namespace kernel {
 class BinaryCrossEntropyCpuKernel : public CPUKernel {
 public:
-  BinaryCrossEntropyCpuKernel() : input_size_(1), reduction_(1) {}
+  BinaryCrossEntropyCpuKernel() : input_size_(1), reduction_(1), weight_defined_(false) {}
  ~BinaryCrossEntropyCpuKernel() override = default;

  void InitKernel(const CNodePtr &kernel_node) override;
@ -42,6 +42,7 @@ class BinaryCrossEntropyCpuKernel : public CPUKernel {
  TypeId dtype_{kTypeUnknown};
  size_t input_size_;
  int reduction_;
+  bool weight_defined_;  // true: there are 3 inputs, false: there are 2 inputs(no [weight])
 };
 MS_REG_CPU_KERNEL(BinaryCrossEntropy,
                  KernelAttr()
@ -57,6 +58,14 @@ MS_REG_CPU_KERNEL(BinaryCrossEntropy,
                    .AddInputAttr(kNumberTypeFloat32)
                    .AddOutputAttr(kNumberTypeFloat32),
                  BinaryCrossEntropyCpuKernel);
+MS_REG_CPU_KERNEL(
+  BinaryCrossEntropy,
+  KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
+  BinaryCrossEntropyCpuKernel);
+MS_REG_CPU_KERNEL(
+  BinaryCrossEntropy,
+  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  BinaryCrossEntropyCpuKernel);
 }  // namespace kernel
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_NN_BINARY_CROSS_ENTROPY_KERNEL_H
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/binary_cross_entropy_grad_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/binary_cross_entropy_grad_kernel.cc
@ -17,33 +17,55 @@

 namespace mindspore {
 namespace kernel {
+constexpr size_t kBceGradInputNumWithWeight = 4;
+
 template <typename T>
 void BinaryCrossEntropyGradCpuKernel::Launchkernel(const std::vector<AddressPtr> &inputs,
                                                   const std::vector<AddressPtr> &outputs) {
  T *input_x = reinterpret_cast<T *>(inputs[0]->addr);
  T *input_y = reinterpret_cast<T *>(inputs[1]->addr);
  T *dloss = reinterpret_cast<T *>(inputs[2]->addr);
-  T *weight = reinterpret_cast<T *>(inputs[3]->addr);
+  T *weight = nullptr;
+  if (weight_defined_) {
+    weight = reinterpret_cast<T *>(inputs[3]->addr);
+  }
+
  T *dx = reinterpret_cast<T *>(outputs[0]->addr);

  T epsilon = static_cast<T>(1e-12);
  T one = static_cast<T>(1);
  if (reduction_ == 0) {
+    if (weight_defined_) {
      for (size_t i = 0; i < input_size_; i++) {
        T denominator = ((input_x[i] * (one - input_x[i])) > epsilon) ? (input_x[i] * (one - input_x[i])) : epsilon;
        T value = weight[i] * (input_x[i] - input_y[i]) / denominator;
        dx[i] = value * dloss[i];
      }
+    } else {
+      for (size_t i = 0; i < input_size_; i++) {
+        T denominator = ((input_x[i] * (one - input_x[i])) > epsilon) ? (input_x[i] * (one - input_x[i])) : epsilon;
+        T value = (input_x[i] - input_y[i]) / denominator;
+        dx[i] = value * dloss[i];
+      }
+    }
  } else {
    T dloss1 = dloss[0];
    if (reduction_ == 1) {
      dloss1 = dloss[0] / static_cast<T>(input_size_);
    }
+    if (weight_defined_) {
      for (size_t i = 0; i < input_size_; i++) {
        T denominator = ((input_x[i] * (one - input_x[i])) > epsilon) ? (input_x[i] * (one - input_x[i])) : epsilon;
        T value = weight[i] * (input_x[i] - input_y[i]) / denominator;
        dx[i] = value * dloss1;
      }
+    } else {
+      for (size_t i = 0; i < input_size_; i++) {
+        T denominator = ((input_x[i] * (one - input_x[i])) > epsilon) ? (input_x[i] * (one - input_x[i])) : epsilon;
+        T value = (input_x[i] - input_y[i]) / denominator;
+        dx[i] = value * dloss1;
+      }
+    }
  }
 }

@ -72,6 +94,8 @@ void BinaryCrossEntropyGradCpuKernel::InitKernel(const CNodePtr &kernel_node) {
    reduction_ = 2;
  }

+  size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
+  weight_defined_ = (input_num == kBceGradInputNumWithWeight);
  dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
 }
 }  // namespace kernel
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/binary_cross_entropy_grad_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/binary_cross_entropy_grad_kernel.h
@ -25,7 +25,7 @@ namespace mindspore {
 namespace kernel {
 class BinaryCrossEntropyGradCpuKernel : public CPUKernel {
 public:
-  BinaryCrossEntropyGradCpuKernel() : input_size_(1), reduction_(1) {}
+  BinaryCrossEntropyGradCpuKernel() : input_size_(1), reduction_(1), weight_defined_(false) {}
  ~BinaryCrossEntropyGradCpuKernel() override = default;

  void InitKernel(const CNodePtr &kernel_node) override;
@ -39,6 +39,7 @@ class BinaryCrossEntropyGradCpuKernel : public CPUKernel {
  TypeId dtype_{kTypeUnknown};
  size_t input_size_;
  int reduction_;
+  bool weight_defined_;  // true: there are 4 inputs, false: there are 3 inputs(no [weight])
 };
 MS_REG_CPU_KERNEL(BinaryCrossEntropyGrad,
                  KernelAttr()
@ -56,6 +57,20 @@ MS_REG_CPU_KERNEL(BinaryCrossEntropyGrad,
                    .AddInputAttr(kNumberTypeFloat32)
                    .AddOutputAttr(kNumberTypeFloat32),
                  BinaryCrossEntropyGradCpuKernel);
+MS_REG_CPU_KERNEL(BinaryCrossEntropyGrad,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeFloat16)
+                    .AddInputAttr(kNumberTypeFloat16)
+                    .AddInputAttr(kNumberTypeFloat16)
+                    .AddOutputAttr(kNumberTypeFloat16),
+                  BinaryCrossEntropyGradCpuKernel);
+MS_REG_CPU_KERNEL(BinaryCrossEntropyGrad,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeFloat32)
+                    .AddInputAttr(kNumberTypeFloat32)
+                    .AddInputAttr(kNumberTypeFloat32)
+                    .AddOutputAttr(kNumberTypeFloat32),
+                  BinaryCrossEntropyGradCpuKernel);
 }  // namespace kernel
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_NN_BINARY_CROSS_ENTROPY_GRAD_KERNEL_H
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/loss_with_reduction_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/loss_with_reduction_impl.cu
@ -124,18 +124,28 @@ __global__ void BinaryCrossEntropyLossKernel(const int input_size, const int red
                                             const T *input_y, const T *weight, T *loss, T *tmp_loss) {
  T epsilon = 1e-12;
  T one = static_cast<T>(1);
-  if (reduction == 0) {
+  if (reduction == 0 && weight != nullptr) {
    for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) {
      T value =
        -weight[i] * (input_y[i] * logT(input_x[i] + epsilon) + (one - input_y[i]) * logT(one - input_x[i] + epsilon));
      loss[i] = value;
    }
-  } else {
+  } else if (reduction == 0 && weight == nullptr) {
+    for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) {
+      T value = -(input_y[i] * logT(input_x[i] + epsilon) + (one - input_y[i]) * logT(one - input_x[i] + epsilon));
+      loss[i] = value;
+    }
+  } else if (reduction != 0 && weight != nullptr) {
    for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) {
      T value =
        -weight[i] * (input_y[i] * logT(input_x[i] + epsilon) + (one - input_y[i]) * logT(one - input_x[i] + epsilon));
      tmp_loss[i] = value;
    }
+  } else {
+    for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) {
+      T value = -(input_y[i] * logT(input_x[i] + epsilon) + (one - input_y[i]) * logT(one - input_x[i] + epsilon));
+      tmp_loss[i] = value;
+    }
  }
 }

@ -165,21 +175,37 @@ __global__ void BinaryCrossEntropyLossGradKernel(const int input_size, const int
  T epsilon = 1e-12;
  T one = static_cast<T>(1);
  if (reduction == 0) {
+    if (weight != nullptr) {
      for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) {
        T denominator = maxT(input_x[i] * (one - input_x[i]), epsilon);
        T value = weight[i] * (input_x[i] - input_y[i]) / denominator;
        dx[i] = value * dloss[i];
      }
+    } else {
+      for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) {
+        T denominator = maxT(input_x[i] * (one - input_x[i]), epsilon);
+        T value = (input_x[i] - input_y[i]) / denominator;
+        dx[i] = value * dloss[i];
+      }
+    }
  } else {
    T dloss1 = dloss[0];
    if (reduction == 1) {
      dloss1 = dloss[0] / castT(dloss[0], input_size);
    }
+    if (weight != nullptr) {
      for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) {
        T denominator = maxT(input_x[i] * (one - input_x[i]), epsilon);
        T value = weight[i] * (input_x[i] - input_y[i]) / denominator;
        dx[i] = value * dloss1;
      }
+    } else {
+      for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) {
+        T denominator = maxT(input_x[i] * (one - input_x[i]), epsilon);
+        T value = (input_x[i] - input_y[i]) / denominator;
+        dx[i] = value * dloss1;
+      }
+    }
  }
 }

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/binary_cross_entropy_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/binary_cross_entropy_gpu_kernel.cc
@ -31,5 +31,13 @@ MS_REG_GPU_KERNEL_ONE(BinaryCrossEntropy,
                        .AddInputAttr(kNumberTypeFloat16)
                        .AddOutputAttr(kNumberTypeFloat16),
                      BinaryCrossEntropyGpuKernel, half)
+MS_REG_GPU_KERNEL_ONE(
+  BinaryCrossEntropy,
+  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  BinaryCrossEntropyGpuKernel, float)
+MS_REG_GPU_KERNEL_ONE(
+  BinaryCrossEntropy,
+  KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
+  BinaryCrossEntropyGpuKernel, half)
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/binary_cross_entropy_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/binary_cross_entropy_gpu_kernel.h
@ -28,7 +28,7 @@ namespace kernel {
 template <typename T>
 class BinaryCrossEntropyGpuKernel : public GpuKernel {
 public:
-  BinaryCrossEntropyGpuKernel() : input_size_(1), reduction_(1) {}
+  BinaryCrossEntropyGpuKernel() : weight_defined_(false), input_size_(1), reduction_(1) {}
  ~BinaryCrossEntropyGpuKernel() override = default;
  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
@ -37,7 +37,10 @@ class BinaryCrossEntropyGpuKernel : public GpuKernel {
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
    T *input_x = GetDeviceAddress<T>(inputs, 0);
    T *input_y = GetDeviceAddress<T>(inputs, 1);
-    T *weight = GetDeviceAddress<T>(inputs, 2);
+    T *weight = nullptr;
+    if (weight_defined_) {
+      weight = GetDeviceAddress<T>(inputs, 2);
+    }
    T *loss = GetDeviceAddress<T>(outputs, 0);
    T *tmp_loss = GetDeviceAddress<T>(workspace, 0);
    if (input_size_ > 0) {
@ -49,6 +52,8 @@ class BinaryCrossEntropyGpuKernel : public GpuKernel {

  bool Init(const CNodePtr &kernel_node) override {
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
+    weight_defined_ = (input_num == 3);
    for (size_t i = 0; i < input_shape.size(); i++) {
      input_size_ *= input_shape[i];
    }
@ -70,7 +75,9 @@ class BinaryCrossEntropyGpuKernel : public GpuKernel {
  void InitSizeLists() override {
    input_size_list_.push_back(input_size_ * sizeof(T));
    input_size_list_.push_back(input_size_ * sizeof(T));
+    if (weight_defined_) {
      input_size_list_.push_back(input_size_ * sizeof(T));
+    }
    if (reduction_ == 0) {
      output_size_list_.push_back(input_size_ * sizeof(T));
    } else {
@ -80,6 +87,7 @@ class BinaryCrossEntropyGpuKernel : public GpuKernel {
  }

 private:
+  bool weight_defined_;  // true: there are 3 inputs, false: there are 2 inputs(no [weight])
  size_t input_size_;
  int reduction_;
  size_t workspace_size_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/binary_cross_entropy_grad_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/binary_cross_entropy_grad_kernel.cc
@ -34,5 +34,19 @@ MS_REG_GPU_KERNEL_ONE(BinaryCrossEntropyGrad,
                        .AddInputAttr(kNumberTypeFloat16)
                        .AddOutputAttr(kNumberTypeFloat16),
                      BinaryCrossEntropyGradGpuKernel, half)
+MS_REG_GPU_KERNEL_ONE(BinaryCrossEntropyGrad,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      BinaryCrossEntropyGradGpuKernel, float)
+MS_REG_GPU_KERNEL_ONE(BinaryCrossEntropyGrad,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat16)
+                        .AddInputAttr(kNumberTypeFloat16)
+                        .AddInputAttr(kNumberTypeFloat16)
+                        .AddOutputAttr(kNumberTypeFloat16),
+                      BinaryCrossEntropyGradGpuKernel, half)
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/binary_cross_entropy_grad_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/binary_cross_entropy_grad_kernel.h
@ -28,7 +28,7 @@ namespace kernel {
 template <typename T>
 class BinaryCrossEntropyGradGpuKernel : public GpuKernel {
 public:
-  BinaryCrossEntropyGradGpuKernel() : input_size_(1), reduction_(1) {}
+  BinaryCrossEntropyGradGpuKernel() : input_size_(1), reduction_(1), weight_defined_(false) {}
  ~BinaryCrossEntropyGradGpuKernel() override = default;

  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@ -40,7 +40,10 @@ class BinaryCrossEntropyGradGpuKernel : public GpuKernel {
    T *input_x = GetDeviceAddress<T>(inputs, 0);
    T *input_y = GetDeviceAddress<T>(inputs, 1);
    T *dloss = GetDeviceAddress<T>(inputs, 2);
-    T *weight = GetDeviceAddress<T>(inputs, 3);
+    T *weight = nullptr;
+    if (weight_defined_) {
+      weight = GetDeviceAddress<T>(inputs, 3);
+    }
    T *dx = GetDeviceAddress<T>(outputs, 0);
    if (input_size_ > 0) {
      BinaryCrossEntropyLossGrad(input_size_, reduction_, input_x, input_y, weight, dloss, dx,
@ -51,6 +54,8 @@ class BinaryCrossEntropyGradGpuKernel : public GpuKernel {

  bool Init(const CNodePtr &kernel_node) override {
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
+    weight_defined_ = (input_num == 4);
    for (size_t i = 0; i < input_shape.size(); i++) {
      input_size_ *= input_shape[i];
    }
@ -73,14 +78,16 @@ class BinaryCrossEntropyGradGpuKernel : public GpuKernel {
    } else {
      input_size_list_.push_back(sizeof(T));
    }
+    if (weight_defined_) {
      input_size_list_.push_back(input_size_ * sizeof(T));
+    }
    output_size_list_.push_back(input_size_ * sizeof(T));
  }

 private:
  size_t input_size_;
  int reduction_;
-
+  bool weight_defined_;  // true: there are 4 inputs, false: there are 3 inputs(no [weight])
  std::vector<size_t> input_size_list_;
  std::vector<size_t> output_size_list_;
  std::vector<size_t> workspace_size_list_;
--- a/tests/st/ops/cpu/test_binary_cross_entropy_op.py
+++ b/tests/st/ops/cpu/test_binary_cross_entropy_op.py
@ -24,12 +24,13 @@ from mindspore.ops import operations as P

 context.set_context(mode=context.GRAPH_MODE, device_target="CPU")

+
 class Net(nn.Cell):
    def __init__(self, reduction="none"):
        super(Net, self).__init__()
        self.BinaryCrossEntropy = P.BinaryCrossEntropy(reduction)

-    def construct(self, x, y, weight):
+    def construct(self, x, y, weight=None):
        return self.BinaryCrossEntropy(x, y, weight)


@ -50,6 +51,7 @@ def test_binary_cross_entropy_loss():
              0.03405444, 0.23934692]
    assert np.allclose(loss.asnumpy(), expect)

+
 def test_binary_cross_entropy_loss_mean():
    np.random.seed(42)
    prediction = np.random.rand(20).astype(np.float32)
@ -61,6 +63,7 @@ def test_binary_cross_entropy_loss_mean():
    expect = [0.7447324991226196]
    assert loss.asnumpy() == expect

+
 def test_binary_cross_entropy_loss_sum():
    np.random.seed(42)
    prediction = np.random.rand(20).astype(np.float32)
@ -72,6 +75,18 @@ def test_binary_cross_entropy_loss_sum():
    expect = [14.894649505615234]
    assert loss.asnumpy() == expect

+
+def test_binary_cross_entropy_loss_sum_without_weight():
+    np.random.seed(42)
+    prediction = np.random.rand(20).astype(np.float32)
+    target = np.random.rand(20).astype(np.float32)
+    reduction = "sum"
+    net = Net(reduction)
+    loss = net(Tensor(prediction), Tensor(target))
+    expect = [25.48195216753522]
+    assert np.allclose(loss.asnumpy(), expect)
+
+
 def test_binary_cross_entropy_loss_16():
    np.random.seed(42)
    prediction = np.random.rand(20).astype(np.float16)
@ -86,6 +101,7 @@ def test_binary_cross_entropy_loss_16():
              0.0340576, 0.239258]
    assert np.allclose(loss.asnumpy(), expect)

+
 def test_binary_cross_entropy_loss_mean_16():
    np.random.seed(42)
    prediction = np.random.rand(20).astype(np.float16)
@ -97,6 +113,7 @@ def test_binary_cross_entropy_loss_mean_16():
    expect = [0.74462890625]
    assert loss.asnumpy() == expect

+
 def test_binary_cross_entropy_loss_sum_16():
    np.random.seed(42)
    prediction = np.random.rand(20).astype(np.float16)
@ -108,13 +125,14 @@ def test_binary_cross_entropy_loss_sum_16():
    expect = [14.890625]
    assert loss.asnumpy() == expect

+
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = C.GradOperation(get_all=True, sens_param=True)
        self.network = network

-    def construct(self, x1, x2, sens, weight):
+    def construct(self, x1, x2, sens, weight=None):
        gout = self.grad(self.network)(x1, x2, sens, weight)
        return gout

--- a/tests/st/ops/gpu/test_binary_cross_entropy_op.py
+++ b/tests/st/ops/gpu/test_binary_cross_entropy_op.py
@ -28,9 +28,9 @@ context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
 class Net(nn.Cell):
    def __init__(self, reduction="none"):
        super(Net, self).__init__()
-        self.BinaryCrossEntropy = P.BinaryCrossEntropy("none")
+        self.BinaryCrossEntropy = P.BinaryCrossEntropy(reduction)

-    def construct(self, x, y, weight):
+    def construct(self, x, y, weight=None):
        return self.BinaryCrossEntropy(x, y, weight)


@ -51,13 +51,24 @@ def test_binary_cross_entropy_loss():
    assert np.allclose(loss.asnumpy(), expect)


+def test_binary_cross_entropy_loss_sum_without_weight():
+    np.random.seed(42)
+    prediction = np.random.rand(20).astype(np.float32)
+    target = np.random.rand(20).astype(np.float32)
+    reduction = "sum"
+    net = Net(reduction)
+    loss = net(Tensor(prediction), Tensor(target))
+    expect = [25.48195216753522]
+    assert np.allclose(loss.asnumpy(), expect)
+
+
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = C.GradOperation(get_all=True, sens_param=True)
        self.network = network

-    def construct(self, x1, x2, sens, weight):
+    def construct(self, x1, x2, sens, weight=None):
        gout = self.grad(self.network)(x1, x2, sens, weight)
        return gout