Add RGB2GRAY operation

2021-03-09 10:43:37 +08:00 · 2021-03-09 10:43:37 +08:00 · 7b56d1772e
parent c69142fdc1
commit 7b56d1772e
18 changed files with 280 additions and 2 deletions
--- a/mindspore/ccsrc/minddata/dataset/api/vision.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/vision.cc
@ -134,6 +134,9 @@ std::shared_ptr<TensorOperation> CenterCrop::Parse(const MapTargetDevice &env) {
  return std::make_shared<CenterCropOperation>(data_->size_);
 }

+// RGB2GRAY Transform Operation.
+std::shared_ptr<TensorOperation> RGB2GRAY::Parse() { return std::make_shared<RgbToGrayOperation>(); }
+
 // Crop Transform Operation.
 struct Crop::Data {
  Data(const std::vector<int32_t> &coordinates, const std::vector<int32_t> &size)
--- a/mindspore/ccsrc/minddata/dataset/include/vision_lite.h
+++ b/mindspore/ccsrc/minddata/dataset/include/vision_lite.h
@ -91,6 +91,22 @@ class CenterCrop : public TensorTransform {
  std::shared_ptr<Data> data_;
 };

+/// \brief RGB2GRAY TensorTransform.
+/// \notes Convert RGB image or color image to grayscale image
+class RGB2GRAY : public TensorTransform {
+ public:
+  /// \brief Constructor.
+  RGB2GRAY() = default;
+
+  /// \brief Destructor.
+  ~RGB2GRAY() = default;
+
+ protected:
+  /// \brief Function to convert TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+};
+
 /// \brief Crop TensorTransform.
 /// \notes Crop an image based on location and crop size
 class Crop : public TensorTransform {
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
@ -44,6 +44,7 @@ add_library(kernels-image OBJECT
    random_sharpness_op.cc
    rescale_op.cc
    resize_op.cc
+    rgb_to_gray_op.cc
    rgba_to_bgr_op.cc
    rgba_to_rgb_op.cc
    sharpness_op.cc
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
@ -1096,6 +1096,23 @@ Status RgbaToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *
  }
 }

+Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
+  try {
+    std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
+    if (input_cv->Rank() != 3 || input_cv->shape()[2] != 3) {
+      RETURN_STATUS_UNEXPECTED("RgbToGray: image shape is not <H,W,C> or channel is not 3.");
+    }
+    TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1]});
+    std::shared_ptr<CVTensor> output_cv;
+    RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
+    cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGB2GRAY));
+    *output = std::static_pointer_cast<Tensor>(output_cv);
+    return Status::OK();
+  } catch (const cv::Exception &e) {
+    RETURN_STATUS_UNEXPECTED("RgbToGray: " + std::string(e.what()));
+  }
+}
+
 Status GetJpegImageInfo(const std::shared_ptr<Tensor> &input, int *img_width, int *img_height) {
  struct jpeg_decompress_struct cinfo {};
  struct JpegErrorManagerCustom jerr {};
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
@ -293,6 +293,12 @@ Status RgbaToRgb(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *
 /// \return Status code
 Status RgbaToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);

+/// \brief Take in a 3 channel image in RBG to GRAY
+/// \param[in] input The input image
+/// \param[out] output The output image
+/// \return Status code
+Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
+
 /// \brief Get jpeg image width and height
 /// \param input: CVTensor containing the not decoded image 1D bytes
 /// \param img_width: the jpeg image width
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc
@ -1672,5 +1672,25 @@ bool GetAffineTransform(std::vector<Point> src_point, std::vector<Point> dst_poi
  return true;
 }

+bool ConvertRgbToGray(const LiteMat &src, LDataType data_type, int w, int h, LiteMat &mat) {
+  if (data_type == LDataType::UINT8) {
+    if (mat.IsEmpty()) {
+      mat.Init(w, h, 1, LDataType::UINT8);
+    }
+    unsigned char *ptr = mat;
+    const unsigned char *data_ptr = src;
+    for (int y = 0; y < h; y++) {
+      for (int x = 0; x < w; x++) {
+        *ptr = (data_ptr[2] * B2GRAY + data_ptr[1] * G2GRAY + data_ptr[0] * R2GRAY + GRAYSHIFT_DELTA) >> GRAYSHIFT;
+        ptr++;
+        data_ptr += 3;
+      }
+    }
+  } else {
+    return false;
+  }
+  return true;
+}
+
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h
@ -137,6 +137,9 @@ bool ConvRowCol(const LiteMat &src, const LiteMat &kx, const LiteMat &ky, LiteMa
 /// \brief Filter the image by a Sobel kernel
 bool Sobel(const LiteMat &src, LiteMat &dst, int flag_x, int flag_y, int ksize, PaddBorderType pad_type);

+/// \brief Convert RGB image or color image to grayscale image
+bool ConvertRgbToGray(const LiteMat &src, LDataType data_type, int w, int h, LiteMat &mat);
+
 }  // namespace dataset
 }  // namespace mindspore
 #endif  // IMAGE_PROCESS_H_
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc
@ -421,6 +421,40 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
  return Status::OK();
 }

+Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
+  if (input->Rank() != 3) {
+    RETURN_STATUS_UNEXPECTED("RgbToGray: input image is not in shape of <H,W,C>");
+  }
+  if (input->type() != DataType::DE_UINT8) {
+    RETURN_STATUS_UNEXPECTED("RgbToGray: image datatype is not uint8.");
+  }
+
+  try {
+    int output_height = input->shape()[0];
+    int output_width = input->shape()[1];
+
+    LiteMat lite_mat_rgb(input->shape()[1], input->shape()[0], input->shape()[2],
+                         const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
+                         GetLiteCVDataType(input->type()));
+    LiteMat lite_mat_convert;
+    std::shared_ptr<Tensor> output_tensor;
+    TensorShape new_shape = TensorShape({output_height, output_width, 1});
+    RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor));
+    uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
+    lite_mat_convert.Init(output_width, output_height, 1, reinterpret_cast<void *>(buffer),
+                          GetLiteCVDataType(input->type()));
+
+    bool ret =
+      ConvertRgbToGray(lite_mat_rgb, GetLiteCVDataType(input->type()), output_width, output_height, lite_mat_convert);
+    CHECK_FAIL_RETURN_UNEXPECTED(ret, "RgbToGray: RGBToGRAY failed.");
+
+    *output = output_tensor;
+  } catch (std::runtime_error &e) {
+    RETURN_STATUS_UNEXPECTED("RgbToGray: " + std::string(e.what()));
+  }
+  return Status::OK();
+}
+
 Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top,
           const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types,
           uint8_t fill_r, uint8_t fill_g, uint8_t fill_b) {
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.h
@ -95,6 +95,12 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
              int32_t output_width, double fx = 0.0, double fy = 0.0,
              InterpolationMode mode = InterpolationMode::kLinear);

+/// \brief Take in a 3 channel image in RBG to GRAY
+/// \param[in] input The input image
+/// \param[out] output The output image
+/// \return Status code
+Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
+
 /// \brief Pads the input image and puts the padded image in the output
 /// \param[in] input: input Tensor
 /// \param[out] output: padded Tensor
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/rgb_to_gray_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/rgb_to_gray_op.cc
@ -0,0 +1,32 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/kernels/image/rgb_to_gray_op.h"
+#ifndef ENABLE_ANDROID
+#include "minddata/dataset/kernels/image/image_utils.h"
+#else
+#include "minddata/dataset/kernels/image/lite_image_utils.h"
+#endif
+
+namespace mindspore {
+namespace dataset {
+
+Status RgbToGrayOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
+  IO_CHECK(input, output);
+  return RgbToGray(input, output);
+}
+
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/rgb_to_gray_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/rgb_to_gray_op.h
@ -0,0 +1,42 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_RGB_TO_GRAY_OP_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_RGB_TO_GRAY_OP_H_
+
+#include <memory>
+#include <vector>
+#include <string>
+
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+
+namespace mindspore {
+namespace dataset {
+class RgbToGrayOp : public TensorOp {
+ public:
+  RgbToGrayOp() = default;
+
+  ~RgbToGrayOp() override = default;
+
+  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+
+  std::string Name() const override { return kRgbToGrayOp; }
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_RGB_TO_GRAY_OP_H_
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.cc
@ -72,6 +72,7 @@
 #include "minddata/dataset/kernels/image/rgba_to_bgr_op.h"
 #include "minddata/dataset/kernels/image/rgba_to_rgb_op.h"
 #endif
+#include "minddata/dataset/kernels/image/rgb_to_gray_op.h"
 #include "minddata/dataset/kernels/image/rotate_op.h"
 #ifndef ENABLE_ANDROID
 #include "minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.h"
@ -232,6 +233,11 @@ Status CenterCropOperation::to_json(nlohmann::json *out_json) {
  return Status::OK();
 }

+// RGB2GRAYOperation
+Status RgbToGrayOperation::ValidateParams() { return Status::OK(); }
+
+std::shared_ptr<TensorOp> RgbToGrayOperation::Build() { return std::make_shared<RgbToGrayOp>(); }
+
 // CropOperation.
 CropOperation::CropOperation(std::vector<int32_t> coordinates, std::vector<int32_t> size)
    : coordinates_(coordinates), size_(size) {}
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.h
@ -74,6 +74,7 @@ constexpr char kResizeOperation[] = "Resize";
 constexpr char kResizeWithBBoxOperation[] = "ResizeWithBBox";
 constexpr char kRgbaToBgrOperation[] = "RgbaToBgr";
 constexpr char kRgbaToRgbOperation[] = "RgbaToRgb";
+constexpr char kRgbToGrayOperation[] = "RgbToGray";
 constexpr char kRotateOperation[] = "Rotate";
 constexpr char kSoftDvppDecodeRandomCropResizeJpegOperation[] = "SoftDvppDecodeRandomCropResizeJpeg";
 constexpr char kSoftDvppDecodeResizeJpegOperation[] = "SoftDvppDecodeResizeJpeg";
@ -163,6 +164,19 @@ class CenterCropOperation : public TensorOperation {
  std::vector<int32_t> size_;
 };

+class RgbToGrayOperation : public TensorOperation {
+ public:
+  RgbToGrayOperation() = default;
+
+  ~RgbToGrayOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  Status ValidateParams() override;
+
+  std::string Name() const override { return kRgbToGrayOperation; }
+};
+
 class CropOperation : public TensorOperation {
 public:
  CropOperation(std::vector<int32_t> coordinates, std::vector<int32_t> size);
--- a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
@ -97,6 +97,7 @@ constexpr char kResizeOp[] = "ResizeOp";
 constexpr char kResizeWithBBoxOp[] = "ResizeWithBBoxOp";
 constexpr char kRgbaToBgrOp[] = "RgbaToBgrOp";
 constexpr char kRgbaToRgbOp[] = "RgbaToRgbOp";
+constexpr char kRgbToGrayOp[] = "RgbToGrayOp";
 constexpr char kSharpnessOp[] = "SharpnessOp";
 constexpr char kSolarizeOp[] = "SolarizeOp";
 constexpr char kSwapRedBlueOp[] = "SwapRedBlueOp";
--- a/mindspore/ccsrc/minddata/dataset/liteapi/include/vision_lite.h
+++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/vision_lite.h
@ -88,6 +88,22 @@ class CenterCrop : public TensorTransform {
  std::shared_ptr<Data> data_;
 };

+/// \brief RGB2GRAY TensorTransform.
+/// \notes Convert RGB image or color image to grayscale image
+class RGB2GRAY : public TensorTransform {
+ public:
+  /// \brief Constructor.
+  RGB2GRAY() = default;
+
+  /// \brief Destructor.
+  ~RGB2GRAY() = default;
+
+ protected:
+  /// \brief Function to convert TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+};
+
 /// \brief Crop TensorTransform.
 /// \notes Crop an image based on location and crop size
 class Crop : public TensorTransform {
--- a/mindspore/lite/minddata/CMakeLists.txt
+++ b/mindspore/lite/minddata/CMakeLists.txt
@ -191,13 +191,14 @@ if(BUILD_MINDDATA STREQUAL "full")
        ${MINDDATA_DIR}/util/cond_var.cc
        ${MINDDATA_DIR}/engine/data_schema.cc
        ${MINDDATA_DIR}/kernels/tensor_op.cc
+        ${MINDDATA_DIR}/kernels/image/affine_op.cc
        ${MINDDATA_DIR}/kernels/image/lite_image_utils.cc
        ${MINDDATA_DIR}/kernels/image/center_crop_op.cc
        ${MINDDATA_DIR}/kernels/image/crop_op.cc
        ${MINDDATA_DIR}/kernels/image/decode_op.cc
        ${MINDDATA_DIR}/kernels/image/normalize_op.cc
-        ${MINDDATA_DIR}/kernels/image/affine_op.cc
        ${MINDDATA_DIR}/kernels/image/resize_op.cc
+        ${MINDDATA_DIR}/kernels/image/rgb_to_gray_op.cc
        ${MINDDATA_DIR}/kernels/image/rotate_op.cc
        ${MINDDATA_DIR}/kernels/image/random_affine_op.cc
        ${MINDDATA_DIR}/kernels/image/math_utils.cc
@ -279,6 +280,7 @@ elseif(BUILD_MINDDATA STREQUAL "wrapper")
            ${MINDDATA_DIR}/kernels/image/crop_op.cc
            ${MINDDATA_DIR}/kernels/image/normalize_op.cc
            ${MINDDATA_DIR}/kernels/image/resize_op.cc
+            ${MINDDATA_DIR}/kernels/image/rgb_to_gray_op.cc
            ${MINDDATA_DIR}/kernels/image/rotate_op.cc
            ${MINDDATA_DIR}/kernels/data/compose_op.cc
            ${MINDDATA_DIR}/kernels/data/duplicate_op.cc
@ -377,6 +379,7 @@ elseif(BUILD_MINDDATA STREQUAL "lite")
        "${MINDDATA_DIR}/kernels/image/random_vertical_flip_with_bbox_op.cc"
        "${MINDDATA_DIR}/kernels/image/random_sharpness_op.cc"
        "${MINDDATA_DIR}/kernels/image/rescale_op.cc"
+        "${MINDDATA_DIR}/kernels/image/rgb_to_gray_op.cc"
        "${MINDDATA_DIR}/kernels/image/rgba_to_bgr_op.cc"
        "${MINDDATA_DIR}/kernels/image/rgba_to_rgb_op.cc"
        "${MINDDATA_DIR}/kernels/image/sharpness_op.cc"
--- a/tests/ut/cpp/dataset/c_api_vision_r_to_z_test.cc
+++ b/tests/ut/cpp/dataset/c_api_vision_r_to_z_test.cc
@ -195,3 +195,39 @@ TEST_F(MindDataTestPipeline, TestResizeWithBBoxSuccess) {
  // Manually terminate the pipeline
  iter->Stop();
 }
+
+TEST_F(MindDataTestPipeline, TestRGB2GRAYSucess) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRGB2GRAYSucess.";
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<SequentialSampler>(0, 1));
+  EXPECT_NE(ds, nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorTransform> convert(new mindspore::dataset::vision::RGB2GRAY());
+
+  ds = ds->Map({convert});
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_EQ(i, 1);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
--- a/tests/ut/cpp/dataset/image_process_test.cc
+++ b/tests/ut/cpp/dataset/image_process_test.cc
@ -13,7 +13,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
- 
+
 #include "common/common.h"
 #include "lite_cv/lite_mat.h"
 #include "lite_cv/image_process.h"
@ -1714,3 +1714,25 @@ TEST_F(MindDataImageProcess, TestSobelFlag) {
  distance_x = sqrt(distance_x / total_size);
  EXPECT_EQ(distance_x, 0.0f);
 }
+
+TEST_F(MindDataImageProcess, testConvertRgbToGray) {
+  std::string filename = "data/dataset/apple.jpg";
+  cv::Mat image = cv::imread(filename, cv::ImreadModes::IMREAD_COLOR);
+  cv::Mat rgb_mat;
+  cv::Mat rgb_mat1;
+
+  cv::cvtColor(image, rgb_mat, CV_BGR2GRAY);
+  cv::imwrite("./opencv_image.jpg", rgb_mat);
+
+  cv::cvtColor(image, rgb_mat1, CV_BGR2RGB);
+
+  LiteMat lite_mat_rgb;
+  lite_mat_rgb.Init(rgb_mat1.cols, rgb_mat1.rows, rgb_mat1.channels(), rgb_mat1.data, LDataType::UINT8);
+  LiteMat lite_mat_gray;
+  bool ret = ConvertRgbToGray(lite_mat_rgb, LDataType::UINT8, image.cols, image.rows, lite_mat_gray);
+  ASSERT_TRUE(ret == true);
+
+  cv::Mat dst_image(lite_mat_gray.height_, lite_mat_gray.width_, CV_8UC1, lite_mat_gray.data_ptr_);
+  cv::imwrite("./mindspore_image.jpg", dst_image);
+  CompareMat(rgb_mat, lite_mat_gray);
+}