From f8e89cf9f827d4748dab139d0b0975c929d608c8 Mon Sep 17 00:00:00 2001 From: shenwei41 Date: Tue, 6 Jul 2021 20:22:25 +0800 Subject: [PATCH] add parameter of lite --- .../kernels/image/lite_cv/image_process.h | 185 +++++++++++++++--- .../dataset/kernels/image/lite_cv/lite_mat.h | 17 +- 2 files changed, 176 insertions(+), 26 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h index 3601cb54d70..a3e3a648285 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h @@ -65,99 +65,238 @@ struct BoxesConfig { }; /// \brief resizing image by bilinear algorithm, the data type of currently only supports is uint8, -/// the channel of currently supports is 3 and 1 +/// the channel of currently supports is 3 and 1. +/// \param[in] src Input image data. +/// \param[in] dst Output image data. +/// \param[in] dst_w The width of the output image. +/// \param[in] dst_h The length of the output image. bool ResizeBilinear(const LiteMat &src, LiteMat &dst, int dst_w, int dst_h); -/// \brief Init Lite Mat from pixel, the conversion of currently supports is rbgaTorgb and rgbaTobgr +/// \brief Init Lite Mat from pixel, the conversion of currently supports is rbgaTorgb and rgbaTobgr. +/// \note The length of the pointer must be the same as that of the multiplication of w and h. +/// \param[in] data Input image data. +/// \param[in] pixel_type The type of pixel_type. +/// \param[in] data_type The type of data_type. +/// \param[in] w The width of the output image. +/// \param[in] h The length of the output image. +/// \param[in] m Used to store image data. bool InitFromPixel(const unsigned char *data, LPixelType pixel_type, LDataType data_type, int w, int h, LiteMat &m); -/// \brief convert the data type, the conversion of currently supports is uint8 to float +/// \brief convert the data type, the conversion of currently supports is uint8 to float. +/// \param[in] src Input image data. +/// \param[in] dst Output image data. +/// \param[in] scale Scale pixel value(default:1.0). bool ConvertTo(const LiteMat &src, LiteMat &dst, double scale = 1.0); -/// \brief crop image, the channel supports is 3 and 1 +/// \brief crop image, the channel supports is 3 and 1. +/// \param[in] src Input image data. +/// \param[in] dst Output image data. +/// \param[in] x The x coordinate value of the starting point of the screenshot. +/// \param[in] y The y coordinate value of the starting point of the screenshot. +/// \param[in] w The width of the screenshot. +/// \param[in] h The height of the screenshot. bool Crop(const LiteMat &src, LiteMat &dst, int x, int y, int w, int h); -/// \brief normalize image, currently the supports data type is float +/// \brief normalize image, currently the supports data type is float. +/// \param[in] src Input image data. +/// \param[in] dst Output image data. +/// \param[in] mean Mean of the data set. +/// \param[in] std Norm of the data set. bool SubStractMeanNormalize(const LiteMat &src, LiteMat &dst, const std::vector &mean, const std::vector &std); -/// \brief padd image, the channel supports is 3 and 1 +/// \brief padd image, the channel supports is 3 and 1. +/// \param[in] src Input image data. +/// \param[in] dst Output image data. +/// \param[in] top The length of top. +/// \param[in] bottom The length of bottom. +/// \param[in] left The length of left. +/// \param[in] right he length of right. +/// \param[in] pad_type The type of pad. +/// \param[in] fill_b_or_gray B or GRAY. +/// \param[in] fill_g G. +/// \param[in] fill_r R. bool Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int right, PaddBorderType pad_type, uint8_t fill_b_or_gray = 0, uint8_t fill_g = 0, uint8_t fill_r = 0); -/// \brief Extract image channel by index +/// \brief Extract image channel by index. +/// \param[in] src Input image data. +/// \param[in] dst Output image data. +/// \param[in] col The serial number of the channel. bool ExtractChannel(LiteMat &src, LiteMat &dst, int col); -/// \brief Split image channels to single channel +/// \brief Split image channels to single channel. +/// \param[in] src Input image data. +/// \param[in] mv Single channel data. bool Split(const LiteMat &src, std::vector &mv); /// \brief Create a multi-channel image out of several single-channel arrays. +/// \param[in] mv Single channel data. +/// \param[in] dst Output image data. bool Merge(const std::vector &mv, LiteMat &dst); -/// \brief Apply affine transformation for 1 channel image +/// \brief Apply affine transformation for 1 channel image. +/// \param[in] src Input image data. +/// \param[in] out_img Output image data. +/// \param[in] M[6] Affine transformation matrix. +/// \param[in] dsize The size of the output image. +/// \param[in] borderValue The pixel value is used for filing after the image is captured. bool Affine(LiteMat &src, LiteMat &out_img, const double M[6], std::vector dsize, UINT8_C1 borderValue); -/// \brief Apply affine transformation for 3 channel image +/// \brief Apply affine transformation for 3 channel image. +/// \param[in] src Input image data. +/// \param[in] out_img Output image data. +/// \param[in] M[6] Affine transformation matrix. +/// \param[in] dsize The size of the output image. +/// \param[in] borderValue The pixel value is used for filing after the image is captured. bool Affine(LiteMat &src, LiteMat &out_img, const double M[6], std::vector dsize, UINT8_C3 borderValue); -/// \brief Get default anchor boxes for Faster R-CNN, SSD, YOLO etc +/// \brief Get default anchor boxes for Faster R-CNN, SSD, YOLO etc. +/// \param[in] config Objects of BoxesConfig structure. std::vector> GetDefaultBoxes(const BoxesConfig config); -/// \brief Convert the prediction boxes to the actual boxes of (y, x, h, w) +/// \brief Convert the prediction boxes to the actual boxes of (y, x, h, w). +/// \param[in] boxes Actual size box. +/// \param[in] default_boxes Default box. +/// \param[in] config Objects of BoxesConfig structure. void ConvertBoxes(std::vector> &boxes, const std::vector> &default_boxes, const BoxesConfig config); -/// \brief Apply Non-Maximum Suppression +/// \brief Apply Non-Maximum Suppression. +/// \param[in] all_boxes All input boxes. +/// \param[in] all_scores Score after all boxes are executed through the network. +/// \param[in] thres Pre-value of IOU. +/// \param[in] max_boxes Maximum value of output box. std::vector ApplyNms(const std::vector> &all_boxes, std::vector &all_scores, float thres, int max_boxes); -/// \brief affine image by linear +/// \brief affine image by linear. +/// \param[in] src Input image data. +/// \param[in] dst Output image data. +/// \param[in] M Transformation matrix +/// \param[in] dst_w The width of the output image. +/// \param[in] dst_h The height of the output image. +/// \param[in] borderType Edge processing type. +/// \param[in] borderValue Boundary fill value. bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int dst_w, int dst_h, PaddBorderType borderType, std::vector &borderValue); -/// \brief perspective image by linear +/// \brief affine image by linear. +/// \param[in] src Input image data. +/// \param[in] dst Output image data. +/// \param[in] M Transformation matrix +/// \param[in] dst_w The width of the output image. +/// \param[in] dst_h The height of the output image. +/// \param[in] borderType Edge processing type. +/// \param[in] borderValue Boundary fill value. bool WarpPerspectiveBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int dst_w, int dst_h, PaddBorderType borderType, std::vector &borderValue); -/// \brief Matrix rotation +/// \brief Matrix rotation. +/// \param[in] x The value of the x-axis of the coordinate rotation point. +/// \param[in] y The value of the y-axis of the coordinate rotation point. +/// \param[in] angle Rotation angle. +/// \param[in] scale Scaling ratio. +/// \param[in] M Output transformation matrix. bool GetRotationMatrix2D(float x, float y, double angle, double scale, LiteMat &M); -/// \brief Perspective transformation +/// \brief Perspective transformation. +/// \param[in] src_point Input coordinate point. +/// \param[in] dst_point Output coordinate point. +/// \param[in] M Output matrix. bool GetPerspectiveTransform(std::vector src_point, std::vector dst_point, LiteMat &M); -/// \brief Affine transformation +/// \brief Affine transformation. +/// \param[in] src_point Input coordinate point. +/// \param[in] dst_point Output coordinate point. +/// \param[in] M Output matrix. bool GetAffineTransform(std::vector src_point, std::vector dst_point, LiteMat &M); -/// \brief Matrix transpose +/// \brief Matrix transpose. +/// \param[in] src Input matrix. +/// \param[in] dst Output matrix. bool Transpose(LiteMat &src, LiteMat &dst); /// \brief Filter the image by a Gaussian kernel +/// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 is supported now. +/// \param[in] dst LiteMat image after processing. +/// \param[in] ksize The size of Gaussian kernel. It should be a vector of size 2 as {kernel_x, kernel_y}, both value of +/// which should be positive and odd. +/// \param[in] sigmaX The Gaussian kernel standard deviation of width. It should be a positive value. +/// \param[in] sigmaY The Gaussian kernel standard deviation of height (default=0.f). It should be a positive value, +/// or will use the value of sigmaX. +/// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT). bool GaussianBlur(const LiteMat &src, LiteMat &dst, const std::vector &ksize, double sigmaX, double sigmaY = 0.f, PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT); /// \brief Detect edges in an image +/// \param[in] src LiteMat image to be processed. Only single channel LiteMat of type UINT8 is supported now. +/// \param[in] dst LiteMat image after processing. +/// \param[in] low_thresh The lower bound of the edge. Pixel with value below it will not be considered as a boundary. +/// It should be a nonnegative value. +//// \param[in] high_thresh The higher bound of the edge. Pixel with value over it will +/// be absolutely considered as a boundary. It should be a nonnegative value and no less than low_thresh. +/// \param[in] ksize The size of Sobel kernel (default=3). It can only be 3, 5 or 7. +/// \param[in] L2gradient Whether to use L2 distance while calculating gradient (default=false). bool Canny(const LiteMat &src, LiteMat &dst, double low_thresh, double high_thresh, int ksize = 3, bool L2gradient = false); -/// \brief Apply a 2D convolution over the image +/// \brief Apply a 2D convolution over the image. +/// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 and FLOAT32 is supported now. +/// \param[in] kernel LiteMat 2D convolutionĀ kernel. Only LiteMat of type FLOAT32 is supported now. +/// \param[in] dst LiteMat image after processing. +/// \param[in] dst_type Output data type of dst. +/// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT). bool Conv2D(const LiteMat &src, const LiteMat &kernel, LiteMat &dst, LDataType dst_type, PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT); /// \brief Applies a separable linear convolution over the image +/// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 and FLOAT32 is supported now. +/// \param[in] kx LiteMat 1D convolutionĀ kernel. Only LiteMat of type FLOAT32 is supported now. +/// \param[in] ky LiteMat 1D convolutionĀ kernel. Only LiteMat of type FLOAT32 is supported now. +/// \param[in] dst LiteMat image after processing. +/// \param[in] dst_type Output data type of dst. +/// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT). bool ConvRowCol(const LiteMat &src, const LiteMat &kx, const LiteMat &ky, LiteMat &dst, LDataType dst_type, PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT); /// \brief Filter the image by a Sobel kernel +/// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 is supported now. +/// \param[in] dst LiteMat image after processing. +/// \param[in] flag_x Order of the derivative x. It should be a nonnegative value and can not be equal to 0 at the same +/// time with flag_y. +/// \param[in] flag_y Order of the derivative y. It should be a nonnegative value and can not be equal +/// to 0 at the same time with flag_x. +/// \param[in] ksize The size of Sobel kernel (default=3). It can only be 1, 3, 5 or 7. +/// \param[in] scale The scale factor for the computed derivative values (default=1.0). +/// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT). bool Sobel(const LiteMat &src, LiteMat &dst, int flag_x, int flag_y, int ksize = 3, double scale = 1.0, PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT); -/// \brief Convert RGB image or color image to BGR image +/// \brief Convert RGB image or color image to BGR image. +/// \param[in] src Input image data. +/// \param[in] data_type The type of data_type. +/// \param[in] w The width of output image. +/// \param[in] h The height of output image. +/// \param[in] mat Output image data. bool ConvertRgbToBgr(const LiteMat &src, LDataType data_type, int w, int h, LiteMat &mat); -/// \brief Convert RGB image or color image to grayscale image +/// \brief Convert RGB image or color image to grayscale image. +/// \param[in] src Input image data. +/// \param[in] data_type The type of data_type. +/// \param[in] w The width of output image. +/// \param[in] h The height of output image. +/// \param[in] mat Output image data. bool ConvertRgbToGray(const LiteMat &src, LDataType data_type, int w, int h, LiteMat &mat); -/// \brief Resize preserve AR with filler +/// \brief Resize preserve AR with filler. +/// \param[in] src Input image data. +/// \param[in] dst Output image data. +/// \param[in] h The height of output image. +/// \param[in] w The width of output image. +/// \param[in] ratioShiftWShiftH Array that records the ratio, width shift, and height shift. +/// \param[in] invM Fixed direction array. +/// \param[in] img_orientation Way of export direction. bool ResizePreserveARWithFiller(LiteMat &src, LiteMat &dst, int h, int w, float (*ratioShiftWShiftH)[3], float (*invM)[2][3], int img_orientation); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h index d2389a350f0..38273e0be76 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h @@ -266,17 +266,28 @@ class LiteMat { } private: - /// \brief apply for memory alignment + /// \brief Apply for memory alignment + /// \param[in] size The size of the requested memory alignment. void *AlignMalloc(unsigned int size); - /// \brief free memory + /// \brief Free memory + /// \param[in] ptr Pointer to free memory. void AlignFree(void *ptr); + /// \brief Initialize the element size of different types of data. + /// \param[in] data_type Type of data. void InitElemSize(LDataType data_type); - /// \brief add reference + /// \brief Add value of reference count. + /// \param[in] p The point of references count. + /// \param[in] value The value of new added references. + /// \return return reference count. int addRef(int *p, int value); + /// \brief Set the step size of the pixels in the Litemat array. + /// \param[in] c0 The number used to set teh value of step[0]. + /// \param[in] c1 The number used to set teh value of step[1]. + /// \param[in] c2 The number used to set teh value of step[2]. void setSteps(int c0, int c1, int c2); public: