forked from mindspore-Ecosystem/mindspore
!16119 Fix minddata issues
From: @luoyang42 Reviewed-by: @jonyguo,@liucunwei Signed-off-by: @jonyguo,@liucunwei
This commit is contained in:
commit
6fcd6cab68
|
@ -366,12 +366,14 @@ install(
|
|||
|
||||
## Public header files for minddata
|
||||
install(
|
||||
FILES ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h
|
||||
FILES ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/config.h
|
||||
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h
|
||||
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/execute.h
|
||||
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/text.h
|
||||
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/transforms.h
|
||||
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h
|
||||
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/vision_lite.h
|
||||
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/vision_ascend.h
|
||||
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/execute.h
|
||||
DESTINATION ${INSTALL_BASE_DIR}/include/dataset
|
||||
COMPONENT mindspore
|
||||
)
|
||||
|
|
|
@ -7,7 +7,6 @@ if(ENABLE_PYTHON)
|
|||
python/bindings/dataset/core/bindings.cc
|
||||
python/bindings/dataset/engine/cache/bindings.cc
|
||||
python/bindings/dataset/engine/datasetops/bindings.cc
|
||||
python/bindings/dataset/engine/datasetops/source/bindings.cc
|
||||
python/bindings/dataset/engine/gnn/bindings.cc
|
||||
python/bindings/dataset/engine/ir/consumer/bindings.cc
|
||||
python/bindings/dataset/engine/ir/datasetops/bindings.cc
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "minddata/dataset/include/audio.h"
|
||||
#include "minddata/dataset/include/dataset/audio.h"
|
||||
|
||||
#include "minddata/dataset/audio/ir/kernels/audio_ir.h"
|
||||
|
||||
|
|
|
@ -1,171 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "minddata/dataset/api/python/pybind_register.h"
|
||||
|
||||
#include "pybind11/pybind11.h"
|
||||
#include "pybind11/stl_bind.h"
|
||||
|
||||
#include "minddata/dataset/engine/datasetops/dataset_op.h"
|
||||
|
||||
#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/clue_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/csv_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/coco_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/io_block.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/text_file_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/voc_op.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
PYBIND_REGISTER(CifarOp, 1, ([](const py::module *m) {
|
||||
(void)py::class_<CifarOp, DatasetOp, std::shared_ptr<CifarOp>>(*m, "CifarOp")
|
||||
.def_static("get_num_rows", [](const std::string &dir, const std::string &usage, bool isCifar10) {
|
||||
int64_t count = 0;
|
||||
THROW_IF_ERROR(CifarOp::CountTotalRows(dir, usage, isCifar10, &count));
|
||||
return count;
|
||||
});
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(ClueOp, 1, ([](const py::module *m) {
|
||||
(void)py::class_<ClueOp, DatasetOp, std::shared_ptr<ClueOp>>(*m, "ClueOp")
|
||||
.def_static("get_num_rows", [](const py::list &files) {
|
||||
int64_t count = 0;
|
||||
std::vector<std::string> filenames;
|
||||
for (auto file : files) {
|
||||
file.is_none() ? (void)filenames.emplace_back("") : filenames.push_back(py::str(file));
|
||||
}
|
||||
THROW_IF_ERROR(ClueOp::CountAllFileRows(filenames, &count));
|
||||
return count;
|
||||
});
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(CsvOp, 1, ([](const py::module *m) {
|
||||
(void)py::class_<CsvOp, DatasetOp, std::shared_ptr<CsvOp>>(*m, "CsvOp")
|
||||
.def_static("get_num_rows", [](const py::list &files, bool csv_header) {
|
||||
int64_t count = 0;
|
||||
std::vector<std::string> filenames;
|
||||
for (auto file : files) {
|
||||
file.is_none() ? (void)filenames.emplace_back("") : filenames.push_back(py::str(file));
|
||||
}
|
||||
THROW_IF_ERROR(CsvOp::CountAllFileRows(filenames, csv_header, &count));
|
||||
return count;
|
||||
});
|
||||
}));
|
||||
PYBIND_REGISTER(CocoOp, 1, ([](const py::module *m) {
|
||||
(void)py::class_<CocoOp, DatasetOp, std::shared_ptr<CocoOp>>(*m, "CocoOp")
|
||||
.def_static("get_class_indexing",
|
||||
[](const std::string &dir, const std::string &file, const std::string &task) {
|
||||
std::vector<std::pair<std::string, std::vector<int32_t>>> output_class_indexing;
|
||||
THROW_IF_ERROR(CocoOp::GetClassIndexing(dir, file, task, &output_class_indexing));
|
||||
return output_class_indexing;
|
||||
})
|
||||
.def_static("get_num_rows",
|
||||
[](const std::string &dir, const std::string &file, const std::string &task) {
|
||||
int64_t count = 0;
|
||||
THROW_IF_ERROR(CocoOp::CountTotalRows(dir, file, task, &count));
|
||||
return count;
|
||||
});
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(ImageFolderOp, 1, ([](const py::module *m) {
|
||||
(void)py::class_<ImageFolderOp, DatasetOp, std::shared_ptr<ImageFolderOp>>(*m, "ImageFolderOp")
|
||||
.def_static("get_num_rows",
|
||||
[](const std::string &path) {
|
||||
int64_t count = 0;
|
||||
THROW_IF_ERROR(ImageFolderOp::CountRowsAndClasses(path, {}, &count, nullptr, {}));
|
||||
return count;
|
||||
})
|
||||
.def_static("get_num_classes", [](const std::string &path,
|
||||
const std::map<std::string, int32_t> class_index) {
|
||||
int64_t num_classes = 0;
|
||||
THROW_IF_ERROR(ImageFolderOp::CountRowsAndClasses(path, {}, nullptr, &num_classes, class_index));
|
||||
return num_classes;
|
||||
});
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(ManifestOp, 1, ([](const py::module *m) {
|
||||
(void)py::class_<ManifestOp, DatasetOp, std::shared_ptr<ManifestOp>>(*m, "ManifestOp");
|
||||
}));
|
||||
PYBIND_REGISTER(MindRecordOp, 1, ([](const py::module *m) {
|
||||
(void)py::class_<MindRecordOp, DatasetOp, std::shared_ptr<MindRecordOp>>(*m, "MindRecordOp")
|
||||
.def_static("get_num_rows", [](const std::vector<std::string> &paths, bool load_dataset,
|
||||
const py::object &sampler, const int64_t num_padded) {
|
||||
int64_t count = 0;
|
||||
std::shared_ptr<mindrecord::ShardOperator> op;
|
||||
if (py::hasattr(sampler, "create_for_minddataset")) {
|
||||
auto create = sampler.attr("create_for_minddataset");
|
||||
op = create().cast<std::shared_ptr<mindrecord::ShardOperator>>();
|
||||
}
|
||||
THROW_IF_ERROR(MindRecordOp::CountTotalRows(paths, load_dataset, op, &count, num_padded));
|
||||
return count;
|
||||
});
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(MnistOp, 1, ([](const py::module *m) {
|
||||
(void)py::class_<MnistOp, DatasetOp, std::shared_ptr<MnistOp>>(*m, "MnistOp")
|
||||
.def_static("get_num_rows", [](const std::string &dir, const std::string &usage) {
|
||||
int64_t count = 0;
|
||||
THROW_IF_ERROR(MnistOp::CountTotalRows(dir, usage, &count));
|
||||
return count;
|
||||
});
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(TextFileOp, 1, ([](const py::module *m) {
|
||||
(void)py::class_<TextFileOp, DatasetOp, std::shared_ptr<TextFileOp>>(*m, "TextFileOp")
|
||||
.def_static("get_num_rows", [](const py::list &files) {
|
||||
int64_t count = 0;
|
||||
std::vector<std::string> filenames;
|
||||
for (auto file : files) {
|
||||
!file.is_none() ? filenames.push_back(py::str(file)) : (void)filenames.emplace_back("");
|
||||
}
|
||||
THROW_IF_ERROR(TextFileOp::CountAllFileRows(filenames, &count));
|
||||
return count;
|
||||
});
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(TFReaderOp, 1, ([](const py::module *m) {
|
||||
(void)py::class_<TFReaderOp, DatasetOp, std::shared_ptr<TFReaderOp>>(*m, "TFReaderOp")
|
||||
.def_static(
|
||||
"get_num_rows", [](const py::list &files, int64_t numParallelWorkers, bool estimate = false) {
|
||||
int64_t count = 0;
|
||||
std::vector<std::string> filenames;
|
||||
for (auto l : files) {
|
||||
!l.is_none() ? filenames.push_back(py::str(l)) : (void)filenames.emplace_back("");
|
||||
}
|
||||
THROW_IF_ERROR(TFReaderOp::CountTotalRows(&count, filenames, numParallelWorkers, estimate));
|
||||
return count;
|
||||
});
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(VOCOp, 1, ([](const py::module *m) {
|
||||
(void)py::class_<VOCOp, DatasetOp, std::shared_ptr<VOCOp>>(*m, "VOCOp")
|
||||
.def_static("get_class_indexing", [](const std::string &dir, const std::string &task_type,
|
||||
const std::string &task_mode, const py::dict &dict) {
|
||||
std::map<std::string, int32_t> output_class_indexing;
|
||||
THROW_IF_ERROR(VOCOp::GetClassIndexing(dir, task_type, task_mode, dict, &output_class_indexing));
|
||||
return output_class_indexing;
|
||||
});
|
||||
}));
|
||||
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_AUDIO_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_AUDIO_H_
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
@ -24,4 +24,4 @@ namespace audio {} // namespace audio
|
|||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_AUDIO_H_
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_
|
|
@ -719,9 +719,9 @@ class RandomCropDecodeResize(ImageTensorOperation):
|
|||
size (Union[int, sequence]): The size of the output image.
|
||||
If size is an integer, a square crop of size (size, size) is returned.
|
||||
If size is a sequence of length 2, it should be (height, width).
|
||||
scale (tuple, optional): Range [min, max) of respective size of the
|
||||
scale (list, tuple, optional): Range [min, max) of respective size of the
|
||||
original size to be cropped (default=(0.08, 1.0)).
|
||||
ratio (tuple, optional): Range [min, max) of aspect ratio to be
|
||||
ratio (list, tuple, optional): Range [min, max) of aspect ratio to be
|
||||
cropped (default=(3. / 4., 4. / 3.)).
|
||||
interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR).
|
||||
It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC].
|
||||
|
@ -914,9 +914,9 @@ class RandomResizedCrop(ImageTensorOperation):
|
|||
size (Union[int, sequence]): The size of the output image.
|
||||
If size is an integer, a square crop of size (size, size) is returned.
|
||||
If size is a sequence of length 2, it should be (height, width).
|
||||
scale (tuple, optional): Range [min, max) of respective size of the original
|
||||
scale (list, tuple, optional): Range [min, max) of respective size of the original
|
||||
size to be cropped (default=(0.08, 1.0)).
|
||||
ratio (tuple, optional): Range [min, max) of aspect ratio to be cropped
|
||||
ratio (list, tuple, optional): Range [min, max) of aspect ratio to be cropped
|
||||
(default=(3. / 4., 4. / 3.)).
|
||||
interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR).
|
||||
It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC].
|
||||
|
@ -968,9 +968,9 @@ class RandomResizedCropWithBBox(ImageTensorOperation):
|
|||
size (Union[int, sequence]): The size of the output image.
|
||||
If size is an integer, a square crop of size (size, size) is returned.
|
||||
If size is a sequence of length 2, it should be (height, width).
|
||||
scale (tuple, optional): Range (min, max) of respective size of the original
|
||||
scale (list, tuple, optional): Range (min, max) of respective size of the original
|
||||
size to be cropped (default=(0.08, 1.0)).
|
||||
ratio (tuple, optional): Range (min, max) of aspect ratio to be cropped
|
||||
ratio (list, tuple, optional): Range (min, max) of aspect ratio to be cropped
|
||||
(default=(3. / 4., 4. / 3.)).
|
||||
interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR).
|
||||
It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC].
|
||||
|
@ -1390,9 +1390,9 @@ class SoftDvppDecodeRandomCropResizeJpeg(ImageTensorOperation):
|
|||
size (Union[int, sequence]): The size of the output image.
|
||||
If size is an integer, a square crop of size (size, size) is returned.
|
||||
If size is a sequence of length 2, it should be (height, width).
|
||||
scale (tuple, optional): Range [min, max) of respective size of the
|
||||
scale (list, tuple, optional): Range [min, max) of respective size of the
|
||||
original size to be cropped (default=(0.08, 1.0)).
|
||||
ratio (tuple, optional): Range [min, max) of aspect ratio to be
|
||||
ratio (list, tuple, optional): Range [min, max) of aspect ratio to be
|
||||
cropped (default=(3. / 4., 4. / 3.)).
|
||||
max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area (default=10).
|
||||
If exceeded, fall back to use center_crop instead.
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "minddata/dataset/include/dataset/execute.h"
|
||||
#include "minddata/dataset/include/dataset/transforms.h"
|
||||
#include "minddata/dataset/include/dataset/vision.h"
|
||||
#include "minddata/dataset/include/dataset/text.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
using namespace mindspore::dataset;
|
||||
|
@ -206,3 +207,42 @@ TEST_F(MindDataTestExecute, TestTransformDecodeResizeCenterCrop1) {
|
|||
ASSERT_EQ(image.Shape()[1], 224);
|
||||
ASSERT_EQ(image.Shape()[2], 224);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestExecute, TestUniformAugment) {
|
||||
// Read images
|
||||
auto image = ReadFileToTensor("data/dataset/apple.jpg");
|
||||
std::vector<mindspore::MSTensor> image2;
|
||||
|
||||
// Transform params
|
||||
std::shared_ptr<TensorTransform> decode = std::make_shared<vision::Decode>();
|
||||
std::shared_ptr<TensorTransform> resize_op(new vision::Resize({16, 16}));
|
||||
std::shared_ptr<TensorTransform> vertical = std::make_shared<vision::RandomVerticalFlip>();
|
||||
std::shared_ptr<TensorTransform> horizontal = std::make_shared<vision::RandomHorizontalFlip>();
|
||||
|
||||
std::shared_ptr<TensorTransform> uniform_op(new vision::UniformAugment({resize_op, vertical, horizontal}, 3));
|
||||
|
||||
auto transform1 = Execute({decode});
|
||||
Status rc = transform1(image, &image);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
|
||||
auto transform2 = Execute({uniform_op});
|
||||
rc = transform2({image}, &image2);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestExecute, TestBasicTokenizer) {
|
||||
std::shared_ptr<Tensor> de_tensor;
|
||||
Tensor::CreateScalar<std::string>("Welcome to China.", &de_tensor);
|
||||
auto txt = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor));
|
||||
std::vector<mindspore::MSTensor> txt_result;
|
||||
|
||||
// Transform params
|
||||
std::shared_ptr<TensorTransform> tokenizer =
|
||||
std::make_shared<text::BasicTokenizer>(false, false, NormalizeForm::kNone, false, true);
|
||||
|
||||
// BasicTokenizer has 3 outputs so we need a vector to receive its result
|
||||
auto transform1 = Execute({tokenizer});
|
||||
Status rc = transform1({txt}, &txt_result);
|
||||
ASSERT_EQ(txt_result.size(), 3);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue