del: column_order in map
This commit is contained in:
parent
a60615e409
commit
911cbd7f6a
|
@ -26,7 +26,6 @@ mindspore.dataset.Dataset.map
|
|||
- **operations** (Union[list[TensorOperation], list[functions]]) - 一组数据增强操作,支持数据集增强算子或者用户自定义的Python Callable对象。map操作将按顺序将一组数据增强作用在数据集对象上。
|
||||
- **input_columns** (Union[str, list[str]], 可选) - 第一个数据增强操作的输入数据列。此列表的长度必须与 `operations` 列表中第一个数据增强的预期输入列数相匹配。默认值:None。表示所有数据列都将传递给第一个数据增强操作。
|
||||
- **output_columns** (Union[str, list[str]], 可选) - 最后一个数据增强操作的输出数据列。如果 `input_columns` 长度不等于 `output_columns` 长度,则必须指定此参数。列表的长度必须必须与最后一个数据增强的输出列数相匹配。默认值:None,输出列将与输入列具有相同的名称。
|
||||
- **column_order** (Union[str, list[str]], 可选) - 指定传递到下一个数据集操作的数据列的顺序。如果 `input_columns` 长度不等于 `output_columns` 长度,则必须指定此参数。注意:参数的列名不限定在 `input_columns` 和 `output_columns` 中指定的列,也可以是上一个操作输出的未被处理的数据列。默认值:None,按照原输入顺序排列。
|
||||
- **num_parallel_workers** (int, 可选) - 指定map操作的多进程/多线程并发数,加快处理速度。默认值:None,将使用 `set_num_parallel_workers` 设置的并发数。
|
||||
- **\*\*kwargs** - 其他参数。
|
||||
|
||||
|
|
|
@ -168,18 +168,11 @@ TensorRow FuncPtrConverter(const std::function<MSTensorVec(MSTensorVec)> &func,
|
|||
}
|
||||
|
||||
// Function to create the iterator, which will build and launch the execution tree.
|
||||
std::shared_ptr<Iterator> Dataset::CreateIteratorCharIF(const std::vector<std::vector<char>> &columns,
|
||||
int32_t num_epochs) {
|
||||
std::shared_ptr<Iterator> Dataset::CreateIteratorCharIF(int32_t num_epochs) {
|
||||
std::shared_ptr<Iterator> iter;
|
||||
try {
|
||||
auto ds = shared_from_this();
|
||||
|
||||
// The specified columns will be selected from the dataset and passed down the pipeline
|
||||
// in the order specified, other columns will be discarded.
|
||||
if (!VectorCharToString(columns).empty()) {
|
||||
ds = ds->Project(VectorCharToString(columns));
|
||||
}
|
||||
|
||||
iter = std::make_shared<Iterator>();
|
||||
Status rc = iter->BuildAndLaunchTree(ds, num_epochs);
|
||||
if (rc.IsError()) {
|
||||
|
@ -195,15 +188,8 @@ std::shared_ptr<Iterator> Dataset::CreateIteratorCharIF(const std::vector<std::v
|
|||
}
|
||||
|
||||
// Function to create the iterator, which will build and launch the execution tree.
|
||||
std::shared_ptr<PullIterator> Dataset::CreatePullBasedIterator(const std::vector<std::vector<char>> &columns) {
|
||||
// The specified columns will be selected from the dataset and passed down the pipeline
|
||||
// in the order specified, other columns will be discarded.
|
||||
// This code is not in a try/catch block because there is no execution tree class that will be created.
|
||||
std::shared_ptr<PullIterator> Dataset::CreatePullBasedIterator() {
|
||||
auto ds = shared_from_this();
|
||||
if (!VectorCharToString(columns).empty()) {
|
||||
ds = ds->Project(VectorCharToString(columns));
|
||||
}
|
||||
|
||||
std::shared_ptr<PullIterator> iter = std::make_shared<PullIterator>();
|
||||
Status rc = iter->BuildAndLaunchTree(ds, 1);
|
||||
if (rc.IsError()) {
|
||||
|
@ -498,16 +484,13 @@ FilterDataset::FilterDataset(const std::shared_ptr<Dataset> &input,
|
|||
MapDataset::MapDataset(const std::shared_ptr<Dataset> &input,
|
||||
const std::vector<std::shared_ptr<TensorOperation>> &operations,
|
||||
const std::vector<std::vector<char>> &input_columns,
|
||||
const std::vector<std::vector<char>> &output_columns,
|
||||
const std::vector<std::vector<char>> &project_columns,
|
||||
const std::shared_ptr<DatasetCache> &cache,
|
||||
const std::vector<std::vector<char>> &output_columns, const std::shared_ptr<DatasetCache> &cache,
|
||||
const std::vector<std::shared_ptr<DSCallback>> &callbacks) {
|
||||
if (input == nullptr) {
|
||||
ir_node_ = nullptr;
|
||||
} else {
|
||||
auto ds = std::make_shared<MapNode>(input->IRNode(), operations, VectorCharToString(input_columns),
|
||||
VectorCharToString(output_columns), VectorCharToString(project_columns), cache,
|
||||
callbacks);
|
||||
VectorCharToString(output_columns), cache, callbacks);
|
||||
|
||||
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
|
||||
}
|
||||
|
|
|
@ -191,19 +191,19 @@ PYBIND_REGISTER(FilterNode, 2, ([](const py::module *m) {
|
|||
|
||||
PYBIND_REGISTER(MapNode, 2, ([](const py::module *m) {
|
||||
(void)py::class_<MapNode, DatasetNode, std::shared_ptr<MapNode>>(*m, "MapNode", "to create a MapNode")
|
||||
.def(py::init(
|
||||
[](const std::shared_ptr<DatasetNode> &self, const py::list &operations,
|
||||
const py::list &input_columns, const py::list &output_columns, const py::list &project_columns,
|
||||
std::vector<std::shared_ptr<PyDSCallback>> py_callbacks, int64_t max_rowsize,
|
||||
const ManualOffloadMode offload, std::shared_ptr<PythonMultiprocessingRuntime> python_mp) {
|
||||
auto map = std::make_shared<MapNode>(
|
||||
self, std::move(toTensorOperations(operations)), toStringVector(input_columns),
|
||||
toStringVector(output_columns), toStringVector(project_columns), nullptr,
|
||||
std::vector<std::shared_ptr<DSCallback>>(py_callbacks.begin(), py_callbacks.end()), offload,
|
||||
python_mp);
|
||||
THROW_IF_ERROR(map->ValidateParams());
|
||||
return map;
|
||||
}));
|
||||
.def(py::init([](const std::shared_ptr<DatasetNode> &self, const py::list &operations,
|
||||
const py::list &input_columns, const py::list &output_columns,
|
||||
std::vector<std::shared_ptr<PyDSCallback>> &py_callbacks, int64_t max_rowsize,
|
||||
const ManualOffloadMode &offload,
|
||||
std::shared_ptr<PythonMultiprocessingRuntime> &python_mp) {
|
||||
auto map = std::make_shared<MapNode>(
|
||||
self, std::move(toTensorOperations(operations)), toStringVector(input_columns),
|
||||
toStringVector(output_columns), nullptr,
|
||||
std::vector<std::shared_ptr<DSCallback>>(py_callbacks.begin(), py_callbacks.end()), offload,
|
||||
python_mp);
|
||||
THROW_IF_ERROR(map->ValidateParams());
|
||||
return map;
|
||||
}));
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(PythonMultiprocessingRuntime, 1, ([](const py::module *m) {
|
||||
|
|
|
@ -34,13 +34,11 @@ namespace dataset {
|
|||
|
||||
MapNode::MapNode(std::shared_ptr<DatasetNode> child, std::vector<std::shared_ptr<TensorOperation>> operations,
|
||||
std::vector<std::string> input_columns, std::vector<std::string> output_columns,
|
||||
const std::vector<std::string> &project_columns, std::shared_ptr<DatasetCache> cache,
|
||||
std::vector<std::shared_ptr<DSCallback>> callbacks, ManualOffloadMode offload,
|
||||
std::shared_ptr<PythonMultiprocessingRuntime> python_mp)
|
||||
std::shared_ptr<DatasetCache> cache, std::vector<std::shared_ptr<DSCallback>> callbacks,
|
||||
ManualOffloadMode offload, std::shared_ptr<PythonMultiprocessingRuntime> python_mp)
|
||||
: operations_(operations),
|
||||
input_columns_(input_columns),
|
||||
output_columns_(output_columns),
|
||||
project_columns_(project_columns),
|
||||
DatasetNode(std::move(cache)),
|
||||
callbacks_(callbacks),
|
||||
offload_(offload),
|
||||
|
@ -50,8 +48,8 @@ MapNode::MapNode(std::shared_ptr<DatasetNode> child, std::vector<std::shared_ptr
|
|||
|
||||
std::shared_ptr<DatasetNode> MapNode::Copy() {
|
||||
std::vector<std::shared_ptr<TensorOperation>> operations = operations_;
|
||||
auto node = std::make_shared<MapNode>(nullptr, operations, input_columns_, output_columns_, project_columns_, cache_,
|
||||
callbacks_, offload_, python_mp_);
|
||||
auto node = std::make_shared<MapNode>(nullptr, operations, input_columns_, output_columns_, cache_, callbacks_,
|
||||
offload_, python_mp_);
|
||||
(void)node->SetNumWorkers(num_workers_);
|
||||
(void)node->SetConnectorQueueSize(connector_que_size_);
|
||||
return node;
|
||||
|
@ -59,7 +57,7 @@ std::shared_ptr<DatasetNode> MapNode::Copy() {
|
|||
|
||||
void MapNode::Print(std::ostream &out) const {
|
||||
out << (Name() + "(<ops>" + ",input:" + PrintColumns(input_columns_) + ",output:" + PrintColumns(output_columns_) +
|
||||
",<project_cols>" + ",num_tensor_ops:")
|
||||
",num_tensor_ops:")
|
||||
<< operations_.size() << ",...)";
|
||||
}
|
||||
|
||||
|
@ -91,12 +89,6 @@ Status MapNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
|
|||
map_op->AddCallbacks(callbacks_);
|
||||
}
|
||||
|
||||
if (!project_columns_.empty()) {
|
||||
auto project_op = std::make_shared<ProjectOp>(project_columns_);
|
||||
project_op->SetTotalRepeats(GetTotalRepeats());
|
||||
project_op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(project_op);
|
||||
}
|
||||
map_op->SetTotalRepeats(GetTotalRepeats());
|
||||
map_op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
|
||||
if (python_mp_ != nullptr) {
|
||||
|
@ -128,10 +120,6 @@ Status MapNode::ValidateParams() {
|
|||
RETURN_IF_NOT_OK(ValidateDatasetColumnParam("Map", "output_columns", output_columns_));
|
||||
}
|
||||
|
||||
if (!project_columns_.empty()) {
|
||||
RETURN_IF_NOT_OK(ValidateDatasetColumnParam("Map", "project_columns", project_columns_));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -165,7 +153,6 @@ Status MapNode::to_json(nlohmann::json *out_json) {
|
|||
args["connector_queue_size"] = connector_que_size_;
|
||||
args["input_columns"] = input_columns_;
|
||||
args["output_columns"] = output_columns_;
|
||||
args["project_columns"] = project_columns_;
|
||||
if (cache_ != nullptr) {
|
||||
nlohmann::json cache_args;
|
||||
RETURN_IF_NOT_OK(cache_->to_json(&cache_args));
|
||||
|
@ -202,14 +189,12 @@ Status MapNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode>
|
|||
RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "connector_queue_size", kMapNode));
|
||||
RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "input_columns", kMapNode));
|
||||
RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "output_columns", kMapNode));
|
||||
RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "project_columns", kMapNode));
|
||||
RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "operations", kMapNode));
|
||||
std::vector<std::string> input_columns = json_obj["input_columns"];
|
||||
std::vector<std::string> output_columns = json_obj["output_columns"];
|
||||
std::vector<std::string> project_columns = json_obj["project_columns"];
|
||||
std::vector<std::shared_ptr<TensorOperation>> operations;
|
||||
RETURN_IF_NOT_OK(Serdes::ConstructTensorOps(json_obj["operations"], &operations));
|
||||
*result = std::make_shared<MapNode>(ds, operations, input_columns, output_columns, project_columns);
|
||||
*result = std::make_shared<MapNode>(ds, operations, input_columns, output_columns);
|
||||
(void)(*result)->SetNumWorkers(json_obj["num_parallel_workers"]);
|
||||
(void)(*result)->SetConnectorQueueSize(json_obj["connector_queue_size"]);
|
||||
return Status::OK();
|
||||
|
|
|
@ -33,8 +33,7 @@ class MapNode : public DatasetNode {
|
|||
/// \brief Constructor
|
||||
MapNode(std::shared_ptr<DatasetNode> child, std::vector<std::shared_ptr<TensorOperation>> operations,
|
||||
std::vector<std::string> input_columns = {}, std::vector<std::string> output_columns = {},
|
||||
const std::vector<std::string> &columns = {}, std::shared_ptr<DatasetCache> cache = nullptr,
|
||||
std::vector<std::shared_ptr<DSCallback>> callbacks = {},
|
||||
std::shared_ptr<DatasetCache> cache = nullptr, std::vector<std::shared_ptr<DSCallback>> callbacks = {},
|
||||
ManualOffloadMode offload = ManualOffloadMode::kUnspecified,
|
||||
std::shared_ptr<PythonMultiprocessingRuntime> python_mp = nullptr);
|
||||
|
||||
|
@ -89,7 +88,6 @@ class MapNode : public DatasetNode {
|
|||
const auto &TensorOperations() const { return operations_; }
|
||||
const std::vector<std::string> &InputColumns() const { return input_columns_; }
|
||||
const std::vector<std::string> &OutputColumns() const { return output_columns_; }
|
||||
const std::vector<std::string> &ProjectColumns() const { return project_columns_; }
|
||||
const std::vector<std::shared_ptr<DSCallback>> &Callbacks() const { return callbacks_; }
|
||||
ManualOffloadMode GetOffload() const { return offload_; }
|
||||
|
||||
|
@ -124,7 +122,6 @@ class MapNode : public DatasetNode {
|
|||
std::vector<std::shared_ptr<TensorOperation>> operations_;
|
||||
std::vector<std::string> input_columns_;
|
||||
std::vector<std::string> output_columns_;
|
||||
std::vector<std::string> project_columns_;
|
||||
std::vector<std::shared_ptr<DSCallback>> callbacks_;
|
||||
|
||||
/// \brief ManualOffloadMode to indicate manual_offload status
|
||||
|
|
|
@ -140,7 +140,6 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers);
|
||||
|
||||
/// \brief A Function to create an PullBasedIterator over the Dataset.
|
||||
/// \param[in] columns List of columns to be used to specify the order of columns.
|
||||
/// \return Shared pointer to the Iterator.
|
||||
/// \par Example
|
||||
/// \code
|
||||
|
@ -149,10 +148,9 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
/// std::unordered_map<std::string, mindspore::MSTensor> row;
|
||||
/// iter->GetNextRow(&row);
|
||||
/// \endcode
|
||||
std::shared_ptr<PullIterator> CreatePullBasedIterator(const std::vector<std::vector<char>> &columns = {});
|
||||
std::shared_ptr<PullIterator> CreatePullBasedIterator();
|
||||
|
||||
/// \brief Function to create an Iterator over the Dataset pipeline.
|
||||
/// \param[in] columns List of columns to be used to specify the order of columns.
|
||||
/// \param[in] num_epochs Number of epochs to run through the pipeline (default=-1, which means infinite epochs).
|
||||
/// An empty row is returned at the end of each epoch.
|
||||
/// \return Shared pointer to the Iterator.
|
||||
|
@ -163,9 +161,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
/// std::unordered_map<std::string, mindspore::MSTensor> row;
|
||||
/// iter->GetNextRow(&row);
|
||||
/// \endcode
|
||||
std::shared_ptr<Iterator> CreateIterator(const std::vector<std::string> &columns = {}, int32_t num_epochs = -1) {
|
||||
return CreateIteratorCharIF(VectorStringToChar(columns), num_epochs);
|
||||
}
|
||||
std::shared_ptr<Iterator> CreateIterator(int32_t num_epochs = -1) { return CreateIteratorCharIF(num_epochs); }
|
||||
|
||||
/// \brief Function to transfer data through a device.
|
||||
/// \note If device is Ascend, features of data will be transferred one by one. The limitation
|
||||
|
@ -385,7 +381,6 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
/// The size of this list must match the number of output columns of the
|
||||
/// last operation. The default output_columns will have the same
|
||||
/// name as the input columns, i.e., the columns will be replaced.
|
||||
/// \param[in] project_columns A list of column names to project.
|
||||
/// \param[in] cache Tensor cache to use (default=nullptr, which means no cache is used).
|
||||
/// \param[in] callbacks List of Dataset callbacks to be called.
|
||||
/// \return Shared pointer to the current Dataset.
|
||||
|
@ -397,36 +392,24 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
///
|
||||
/// /* 1) Simple map example */
|
||||
/// // Apply decode_op on column "image". This column will be replaced by the outputted
|
||||
/// // column of decode_op. Since column_order is not provided, both columns "image"
|
||||
/// // and "label" will be propagated to the child node in their original order.
|
||||
/// // column of decode_op.
|
||||
/// dataset = dataset->Map({decode_op}, {"image"});
|
||||
///
|
||||
/// // Decode and rename column "image" to "decoded_image".
|
||||
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"});
|
||||
///
|
||||
/// // Specify the order of the output columns.
|
||||
/// dataset = dataset->Map({decode_op}, {"image"}, {}, {"label", "image"});
|
||||
///
|
||||
/// // Rename column "image" to "decoded_image" and also specify the order of the output columns.
|
||||
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"}, {"label", "decoded_image"});
|
||||
///
|
||||
/// // Rename column "image" to "decoded_image" and keep only this column.
|
||||
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"}, {"decoded_image"});
|
||||
///
|
||||
/// /* 2) Map example with more than one operation */
|
||||
/// // Create a dataset where the images are decoded, then randomly color jittered.
|
||||
/// // decode_op takes column "image" as input and outputs one column. The column
|
||||
/// // outputted by decode_op is passed as input to random_jitter_op.
|
||||
/// // random_jitter_op will output one column. Column "image" will be replaced by
|
||||
/// // the column outputted by random_jitter_op (the very last operation). All other
|
||||
/// // columns are unchanged. Since column_order is not specified, the order of the
|
||||
/// // columns will remain the same.
|
||||
/// // columns are unchanged.
|
||||
/// dataset = dataset->Map({decode_op, random_jitter_op}, {"image"})
|
||||
/// \endcode
|
||||
std::shared_ptr<MapDataset> Map(const std::vector<TensorTransform *> &operations,
|
||||
const std::vector<std::string> &input_columns = {},
|
||||
const std::vector<std::string> &output_columns = {},
|
||||
const std::vector<std::string> &project_columns = {},
|
||||
const std::shared_ptr<DatasetCache> &cache = nullptr,
|
||||
const std::vector<std::shared_ptr<DSCallback>> &callbacks = {}) {
|
||||
std::vector<std::shared_ptr<TensorOperation>> transform_ops;
|
||||
|
@ -434,8 +417,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
operations.begin(), operations.end(), std::back_inserter(transform_ops),
|
||||
[](TensorTransform *op) -> std::shared_ptr<TensorOperation> { return op != nullptr ? op->Parse() : nullptr; });
|
||||
return std::make_shared<MapDataset>(shared_from_this(), transform_ops, VectorStringToChar(input_columns),
|
||||
VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache,
|
||||
callbacks);
|
||||
VectorStringToChar(output_columns), cache, callbacks);
|
||||
}
|
||||
|
||||
/// \brief Function to create a MapDataset.
|
||||
|
@ -451,14 +433,12 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
/// The size of this list must match the number of output columns of the
|
||||
/// last operation. The default output_columns will have the same
|
||||
/// name as the input columns, i.e., the columns will be replaced.
|
||||
/// \param[in] project_columns A list of column names to project.
|
||||
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
|
||||
/// \param[in] callbacks List of Dataset callbacks to be called.
|
||||
/// \return Shared pointer to the current Dataset.
|
||||
std::shared_ptr<MapDataset> Map(const std::vector<std::shared_ptr<TensorTransform>> &operations,
|
||||
const std::vector<std::string> &input_columns = {},
|
||||
const std::vector<std::string> &output_columns = {},
|
||||
const std::vector<std::string> &project_columns = {},
|
||||
const std::shared_ptr<DatasetCache> &cache = nullptr,
|
||||
const std::vector<std::shared_ptr<DSCallback>> &callbacks = {}) {
|
||||
std::vector<std::shared_ptr<TensorOperation>> transform_ops;
|
||||
|
@ -467,8 +447,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
return op != nullptr ? op->Parse() : nullptr;
|
||||
});
|
||||
return std::make_shared<MapDataset>(shared_from_this(), transform_ops, VectorStringToChar(input_columns),
|
||||
VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache,
|
||||
callbacks);
|
||||
VectorStringToChar(output_columns), cache, callbacks);
|
||||
}
|
||||
|
||||
/// \brief Function to create a MapDataset.
|
||||
|
@ -484,22 +463,19 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
/// The size of this list must match the number of output columns of the
|
||||
/// last operation. The default output_columns will have the same
|
||||
/// name as the input columns, i.e., the columns will be replaced.
|
||||
/// \param[in] project_columns A list of column names to project.
|
||||
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
|
||||
/// \param[in] callbacks List of Dataset callbacks to be called.
|
||||
/// \return Shared pointer to the current Dataset.
|
||||
std::shared_ptr<MapDataset> Map(const std::vector<std::reference_wrapper<TensorTransform>> &operations,
|
||||
const std::vector<std::string> &input_columns = {},
|
||||
const std::vector<std::string> &output_columns = {},
|
||||
const std::vector<std::string> &project_columns = {},
|
||||
const std::shared_ptr<DatasetCache> &cache = nullptr,
|
||||
const std::vector<std::shared_ptr<DSCallback>> &callbacks = {}) {
|
||||
std::vector<std::shared_ptr<TensorOperation>> transform_ops;
|
||||
(void)std::transform(operations.begin(), operations.end(), std::back_inserter(transform_ops),
|
||||
[](TensorTransform &op) -> std::shared_ptr<TensorOperation> { return op.Parse(); });
|
||||
return std::make_shared<MapDataset>(shared_from_this(), transform_ops, VectorStringToChar(input_columns),
|
||||
VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache,
|
||||
callbacks);
|
||||
VectorStringToChar(output_columns), cache, callbacks);
|
||||
}
|
||||
|
||||
/// \brief Function to create a Project Dataset.
|
||||
|
@ -617,7 +593,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
std::vector<std::pair<std::vector<char>, std::vector<int32_t>>> GetClassIndexingCharIF();
|
||||
|
||||
// Char interface(CharIF) of CreateIterator
|
||||
std::shared_ptr<Iterator> CreateIteratorCharIF(const std::vector<std::vector<char>> &columns, int32_t num_epochs);
|
||||
std::shared_ptr<Iterator> CreateIteratorCharIF(int32_t num_epochs);
|
||||
|
||||
// Char interface(CharIF) of DeviceQueue
|
||||
bool DeviceQueueCharIF(const std::vector<char> &queue_name, const std::vector<char> &device_type, int32_t device_id,
|
||||
|
@ -866,13 +842,11 @@ class DATASET_API MapDataset : public Dataset {
|
|||
/// The size of this list must match the number of output columns of the
|
||||
/// last operation. The default output_columns will have the same
|
||||
/// name as the input columns, i.e., the columns will be replaced.
|
||||
/// \param[in] project_columns A list of column names to project.
|
||||
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
|
||||
/// \param[in] callbacks List of Dataset callbacks to be called.
|
||||
MapDataset(const std::shared_ptr<Dataset> &input, const std::vector<std::shared_ptr<TensorOperation>> &operations,
|
||||
const std::vector<std::vector<char>> &input_columns, const std::vector<std::vector<char>> &output_columns,
|
||||
const std::vector<std::vector<char>> &project_columns, const std::shared_ptr<DatasetCache> &cache,
|
||||
const std::vector<std::shared_ptr<DSCallback>> &callbacks);
|
||||
const std::shared_ptr<DatasetCache> &cache, const std::vector<std::shared_ptr<DSCallback>> &callbacks);
|
||||
|
||||
/// \brief Destructor of MapDataset.
|
||||
~MapDataset() override = default;
|
||||
|
|
|
@ -122,7 +122,6 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers);
|
||||
|
||||
/// \brief Function to create an PullBasedIterator over the Dataset
|
||||
/// \param[in] columns List of columns to be used to specify the order of columns
|
||||
/// \return Shared pointer to the Iterator
|
||||
/// \par Example
|
||||
/// \code
|
||||
|
@ -131,10 +130,9 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
/// std::unordered_map<std::string, mindspore::MSTensor> row;
|
||||
/// iter->GetNextRow(&row);
|
||||
/// \endcode
|
||||
std::shared_ptr<PullIterator> CreatePullBasedIterator(const std::vector<std::vector<char>> &columns = {});
|
||||
std::shared_ptr<PullIterator> CreatePullBasedIterator();
|
||||
|
||||
/// \brief Function to create an Iterator over the Dataset pipeline
|
||||
/// \param[in] columns List of columns to be used to specify the order of columns
|
||||
/// \param[in] num_epochs Number of epochs to run through the pipeline, default -1 which means infinite epochs.
|
||||
/// An empty row is returned at the end of each epoch
|
||||
/// \return Shared pointer to the Iterator
|
||||
|
@ -145,9 +143,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
/// std::unordered_map<std::string, mindspore::MSTensor> row;
|
||||
/// iter->GetNextRow(&row);
|
||||
/// \endcode
|
||||
std::shared_ptr<Iterator> CreateIterator(const std::vector<std::string> &columns = {}, int32_t num_epochs = -1) {
|
||||
return CreateIteratorCharIF(VectorStringToChar(columns), num_epochs);
|
||||
}
|
||||
std::shared_ptr<Iterator> CreateIterator(int32_t num_epochs = -1) { return CreateIteratorCharIF(num_epochs); }
|
||||
|
||||
/// \brief Function to transfer data through a device.
|
||||
/// \note If device is Ascend, features of data will be transferred one by one. The limitation
|
||||
|
@ -221,7 +217,6 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
/// The size of this list must match the number of output columns of the
|
||||
/// last operation. The default output_columns will have the same
|
||||
/// name as the input columns, i.e., the columns will be replaced
|
||||
/// \param[in] project_columns A list of column names to project
|
||||
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
|
||||
/// \param[in] callbacks List of Dataset callbacks to be called.
|
||||
/// \return Shared pointer to the current MapDataset
|
||||
|
@ -233,36 +228,24 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
///
|
||||
/// /* 1) Simple map example */
|
||||
/// // Apply decode_op on column "image". This column will be replaced by the outputted
|
||||
/// // column of decode_op. Since column_order is not provided, both columns "image"
|
||||
/// // and "label" will be propagated to the child node in their original order.
|
||||
/// // column of decode_op.
|
||||
/// dataset = dataset->Map({decode_op}, {"image"});
|
||||
///
|
||||
/// // Decode and rename column "image" to "decoded_image".
|
||||
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"});
|
||||
///
|
||||
/// // Specify the order of the output columns.
|
||||
/// dataset = dataset->Map({decode_op}, {"image"}, {}, {"label", "image"});
|
||||
///
|
||||
/// // Rename column "image" to "decoded_image" and also specify the order of the output columns.
|
||||
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"}, {"label", "decoded_image"});
|
||||
///
|
||||
/// // Rename column "image" to "decoded_image" and keep only this column.
|
||||
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"}, {"decoded_image"});
|
||||
///
|
||||
/// /* 2) Map example with more than one operation */
|
||||
/// // Create a dataset where the images are decoded, then randomly color jittered.
|
||||
/// // decode_op takes column "image" as input and outputs one column. The column
|
||||
/// // outputted by decode_op is passed as input to random_jitter_op.
|
||||
/// // random_jitter_op will output one column. Column "image" will be replaced by
|
||||
/// // the column outputted by random_jitter_op (the very last operation). All other
|
||||
/// // columns are unchanged. Since column_order is not specified, the order of the
|
||||
/// // columns will remain the same.
|
||||
/// // columns are unchanged.
|
||||
/// dataset = dataset->Map({decode_op, random_jitter_op}, {"image"})
|
||||
/// \endcode
|
||||
std::shared_ptr<MapDataset> Map(const std::vector<TensorTransform *> &operations,
|
||||
const std::vector<std::string> &input_columns = {},
|
||||
const std::vector<std::string> &output_columns = {},
|
||||
const std::vector<std::string> &project_columns = {},
|
||||
const std::shared_ptr<DatasetCache> &cache = nullptr,
|
||||
const std::vector<std::shared_ptr<DSCallback>> &callbacks = {}) {
|
||||
std::vector<std::shared_ptr<TensorOperation>> transform_ops;
|
||||
|
@ -270,8 +253,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
operations.begin(), operations.end(), std::back_inserter(transform_ops),
|
||||
[](TensorTransform *op) -> std::shared_ptr<TensorOperation> { return op != nullptr ? op->Parse() : nullptr; });
|
||||
return std::make_shared<MapDataset>(shared_from_this(), transform_ops, VectorStringToChar(input_columns),
|
||||
VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache,
|
||||
callbacks);
|
||||
VectorStringToChar(output_columns), cache, callbacks);
|
||||
}
|
||||
|
||||
/// \brief Function to create a MapDataset
|
||||
|
@ -287,14 +269,12 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
/// The size of this list must match the number of output columns of the
|
||||
/// last operation. The default output_columns will have the same
|
||||
/// name as the input columns, i.e., the columns will be replaced
|
||||
/// \param[in] project_columns A list of column names to project
|
||||
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
|
||||
/// \param[in] callbacks List of Dataset callbacks to be called.
|
||||
/// \return Shared pointer to the current MapDataset
|
||||
std::shared_ptr<MapDataset> Map(const std::vector<std::shared_ptr<TensorTransform>> &operations,
|
||||
const std::vector<std::string> &input_columns = {},
|
||||
const std::vector<std::string> &output_columns = {},
|
||||
const std::vector<std::string> &project_columns = {},
|
||||
const std::shared_ptr<DatasetCache> &cache = nullptr,
|
||||
const std::vector<std::shared_ptr<DSCallback>> &callbacks = {}) {
|
||||
std::vector<std::shared_ptr<TensorOperation>> transform_ops;
|
||||
|
@ -303,8 +283,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
return op != nullptr ? op->Parse() : nullptr;
|
||||
});
|
||||
return std::make_shared<MapDataset>(shared_from_this(), transform_ops, VectorStringToChar(input_columns),
|
||||
VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache,
|
||||
callbacks);
|
||||
VectorStringToChar(output_columns), cache, callbacks);
|
||||
}
|
||||
|
||||
/// \brief Function to create a MapDataset
|
||||
|
@ -320,22 +299,19 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
/// The size of this list must match the number of output columns of the
|
||||
/// last operation. The default output_columns will have the same
|
||||
/// name as the input columns, i.e., the columns will be replaced
|
||||
/// \param[in] project_columns A list of column names to project
|
||||
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
|
||||
/// \param[in] callbacks List of Dataset callbacks to be called.
|
||||
/// \return Shared pointer to the current MapDataset
|
||||
std::shared_ptr<MapDataset> Map(const std::vector<std::reference_wrapper<TensorTransform>> &operations,
|
||||
const std::vector<std::string> &input_columns = {},
|
||||
const std::vector<std::string> &output_columns = {},
|
||||
const std::vector<std::string> &project_columns = {},
|
||||
const std::shared_ptr<DatasetCache> &cache = nullptr,
|
||||
const std::vector<std::shared_ptr<DSCallback>> &callbacks = {}) {
|
||||
std::vector<std::shared_ptr<TensorOperation>> transform_ops;
|
||||
(void)std::transform(operations.begin(), operations.end(), std::back_inserter(transform_ops),
|
||||
[](TensorTransform &op) -> std::shared_ptr<TensorOperation> { return op.Parse(); });
|
||||
return std::make_shared<MapDataset>(shared_from_this(), transform_ops, VectorStringToChar(input_columns),
|
||||
VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache,
|
||||
callbacks);
|
||||
VectorStringToChar(output_columns), cache, callbacks);
|
||||
}
|
||||
|
||||
/// \brief Function to create a Project Dataset
|
||||
|
@ -380,7 +356,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
std::vector<std::pair<std::vector<char>, std::vector<int32_t>>> GetClassIndexingCharIF();
|
||||
|
||||
// Char interface(CharIF) of CreateIterator
|
||||
std::shared_ptr<Iterator> CreateIteratorCharIF(const std::vector<std::vector<char>> &columns, int32_t num_epochs);
|
||||
std::shared_ptr<Iterator> CreateIteratorCharIF(int32_t num_epochs);
|
||||
|
||||
// Char interface(CharIF) of DeviceQueue
|
||||
bool DeviceQueueCharIF(const std::vector<char> &queue_name, const std::vector<char> &device_type, int32_t device_id,
|
||||
|
@ -503,8 +479,7 @@ class DATASET_API MapDataset : public Dataset {
|
|||
public:
|
||||
MapDataset(const std::shared_ptr<Dataset> &input, const std::vector<std::shared_ptr<TensorOperation>> &operations,
|
||||
const std::vector<std::vector<char>> &input_columns, const std::vector<std::vector<char>> &output_columns,
|
||||
const std::vector<std::vector<char>> &project_columns, const std::shared_ptr<DatasetCache> &cache,
|
||||
const std::vector<std::shared_ptr<DSCallback>> &callbacks);
|
||||
const std::shared_ptr<DatasetCache> &cache, const std::vector<std::shared_ptr<DSCallback>> &callbacks);
|
||||
|
||||
~MapDataset() override = default;
|
||||
};
|
||||
|
|
|
@ -839,9 +839,6 @@ class Dataset:
|
|||
len(output_columns). The size of this list must match the number of output
|
||||
columns of the last operation. (default=None, output columns will have the same
|
||||
name as the input columns, i.e., the columns will be replaced).
|
||||
column_order (Union[str, list[str]], optional): Specifies the list of all the columns you need in the whole
|
||||
dataset (default=None). The parameter is required when len(input_column) != len(output_column).
|
||||
Caution: the list here is not just the columns specified in parameter input_columns and output_columns.
|
||||
num_parallel_workers (int, optional): Number of threads used to process the dataset in
|
||||
parallel (default=None, the value from the configuration will be used).
|
||||
**kwargs:
|
||||
|
@ -871,6 +868,8 @@ class Dataset:
|
|||
|
||||
Examples:
|
||||
>>> # dataset is an instance of Dataset which has 2 columns, "image" and "label".
|
||||
>>> # image is of type bytes type which can be decoded to RGB
|
||||
>>> # label is of type int32
|
||||
>>>
|
||||
>>> # Define two operations, where each operation accepts 1 input column and outputs 1 column.
|
||||
>>> decode_op = c_vision.Decode(rgb=True)
|
||||
|
@ -879,30 +878,15 @@ class Dataset:
|
|||
>>>
|
||||
>>> # 1) Simple map example.
|
||||
>>>
|
||||
>>> # Apply decode_op on column "image". This column will be replaced by the outputted
|
||||
>>> # column of decode_op. Since column_order is not provided, both columns "image"
|
||||
>>> # and "label" will be propagated to the child node in their original order.
|
||||
>>> # Apply decode_op on column "image".
|
||||
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"])
|
||||
>>>
|
||||
>>> # Decode and rename column "image" to "decoded_image".
|
||||
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"], output_columns=["decoded_image"])
|
||||
>>>
|
||||
>>> # Specify the order of the output columns.
|
||||
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"],
|
||||
... output_columns=None, column_order=["label", "image"])
|
||||
>>>
|
||||
>>> # Rename column "image" to "decoded_image" and also specify the order of the output columns.
|
||||
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"],
|
||||
... output_columns=["decoded_image"], column_order=["label", "decoded_image"])
|
||||
>>>
|
||||
>>> # Rename column "image" to "decoded_image" and keep only this column.
|
||||
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"],
|
||||
... output_columns=["decoded_image"], column_order=["decoded_image"])
|
||||
>>>
|
||||
>>> # A simple example for mapping pyfunc. Renaming columns and specifying column order
|
||||
>>> # work in the same way as the previous examples.
|
||||
>>> # A simple example for user defined python function transform.
|
||||
>>> dataset = ds.NumpySlicesDataset(data=[[0, 1, 2]], column_names=["data"])
|
||||
>>> dataset = dataset.map(operations=[(lambda x: x + 1)], input_columns=["data"])
|
||||
>>> dataset = dataset.map(operations=[(lambda x: x - 1)], input_columns=["data"])
|
||||
>>>
|
||||
>>> # 2) Map example with more than one operation.
|
||||
>>>
|
||||
|
@ -911,17 +895,14 @@ class Dataset:
|
|||
>>> # outputted by decode_op is passed as input to random_jitter_op.
|
||||
>>> # random_jitter_op will output one column. Column "image" will be replaced by
|
||||
>>> # the column outputted by random_jitter_op (the very last operation). All other
|
||||
>>> # columns are unchanged. Since column_order is not specified, the order of the
|
||||
>>> # columns will remain the same.
|
||||
>>> # columns are unchanged.
|
||||
>>> dataset = dataset.map(operations=[decode_op, random_jitter_op], input_columns=["image"])
|
||||
>>>
|
||||
>>> # Rename the column outputted by random_jitter_op to "image_mapped".
|
||||
>>> # Specifying column order works in the same way as examples in 1).
|
||||
>>> dataset = dataset.map(operations=[decode_op, random_jitter_op], input_columns=["image"],
|
||||
... output_columns=["image_mapped"])
|
||||
>>>
|
||||
>>> # Map with multiple operations using pyfunc. Renaming columns and specifying column order
|
||||
>>> # work in the same way as examples in 1).
|
||||
>>> # Map with multiple operations using pyfunc and rename column's name
|
||||
>>> dataset = ds.NumpySlicesDataset(data=[[0, 1, 2]], column_names=["data"])
|
||||
>>> dataset = dataset.map(operations=[(lambda x: x * x), (lambda x: x - 1)], input_columns=["data"],
|
||||
... output_columns=["data_mapped"])
|
||||
|
@ -938,22 +919,9 @@ class Dataset:
|
|||
>>> operations = [(lambda x, y: (x, x + y, x + y + 1)),
|
||||
... (lambda x, y, z: x * y * z),
|
||||
... (lambda x: (x % 2, x % 3, x % 5, x % 7))]
|
||||
>>>
|
||||
>>> # Note: Since the number of input columns is not the same as the number of
|
||||
>>> # output columns, the output_columns and column_order parameters must be
|
||||
>>> # specified. Otherwise, this map call will also result in an error.
|
||||
>>>
|
||||
>>> dataset = ds.NumpySlicesDataset(data=([[0, 1, 2]], [[3, 4, 5]]), column_names=["x", "y"])
|
||||
>>>
|
||||
>>> # Propagate all columns to the child node in this order:
|
||||
>>> dataset = dataset.map(operations, input_columns=["x", "y"],
|
||||
... output_columns=["mod2", "mod3", "mod5", "mod7"],
|
||||
... column_order=["mod2", "mod3", "mod5", "mod7"])
|
||||
>>>
|
||||
>>> # Propagate some columns to the child node in this order:
|
||||
>>> dataset = dataset.map(operations, input_columns=["x", "y"],
|
||||
... output_columns=["mod2", "mod3", "mod5", "mod7"],
|
||||
... column_order=["mod7", "mod3", "col2"])
|
||||
... output_columns=["mod2", "mod3", "mod5", "mod7"])
|
||||
"""
|
||||
if hasattr(self, 'operator_mixed') and getattr(self, 'operator_mixed') is True:
|
||||
num_parallel_workers = 1
|
||||
|
@ -962,12 +930,7 @@ class Dataset:
|
|||
"mindspore.numpy module and etc, which do not support multi-thread compiling, recommend to replace it "
|
||||
"with python implemented operator like numpy etc. Here decrease 'num_parallel_workers' into 1.")
|
||||
|
||||
if column_order is not None:
|
||||
logger.warning("The parameter column_order will be deprecated in the future. "
|
||||
"Please use '.project' operation instead.")
|
||||
|
||||
return MapDataset(self, operations, input_columns, output_columns, column_order, num_parallel_workers,
|
||||
**kwargs)
|
||||
return MapDataset(self, operations, input_columns, output_columns, num_parallel_workers, **kwargs)
|
||||
|
||||
@check_filter
|
||||
def filter(self, predicate, input_columns=None, num_parallel_workers=None):
|
||||
|
@ -3330,9 +3293,6 @@ class MapDataset(UnionBaseDataset):
|
|||
The size of the list should match the number of outputs of the last operator
|
||||
(default=None, output columns will be the input columns, i.e., the columns will
|
||||
be replaced).
|
||||
column_order (list[str], optional): Specifies the list of all the columns you need in the whole
|
||||
dataset. The parameter is required when len(input_column) != len(output_column). Caution: the list here
|
||||
is not just the columns specified in parameter input_columns and output_columns.
|
||||
num_parallel_workers (int, optional): Number of workers to process the dataset
|
||||
in parallel (default=None).
|
||||
python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
|
||||
|
@ -3343,12 +3303,9 @@ class MapDataset(UnionBaseDataset):
|
|||
max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
|
||||
data between processes. This is only used if python_multiprocessing is set to True (default=16).
|
||||
offload (bool, optional): Flag to indicate whether offload is used (Default=None).
|
||||
|
||||
Raises:
|
||||
ValueError: If len(input_columns) != len(output_columns) and column_order is not specified.
|
||||
"""
|
||||
|
||||
def __init__(self, input_dataset, operations=None, input_columns=None, output_columns=None, column_order=None,
|
||||
def __init__(self, input_dataset, operations=None, input_columns=None, output_columns=None,
|
||||
num_parallel_workers=None, python_multiprocessing=False, cache=None, callbacks=None, max_rowsize=16,
|
||||
offload=None):
|
||||
super().__init__(children=input_dataset, num_parallel_workers=num_parallel_workers, cache=cache)
|
||||
|
@ -3367,17 +3324,10 @@ class MapDataset(UnionBaseDataset):
|
|||
|
||||
self.input_columns = to_list(input_columns)
|
||||
self.output_columns = to_list(output_columns)
|
||||
self.column_order = replace_none(column_order, [])
|
||||
|
||||
# If output_columns were not provided then use input_columns
|
||||
self.output_columns = self.input_columns if not self.output_columns else self.output_columns
|
||||
|
||||
if self.input_columns and self.output_columns \
|
||||
and len(self.input_columns) != len(self.output_columns) \
|
||||
and not self.column_order:
|
||||
raise ValueError("When length of input_columns and output_columns are not equal,"
|
||||
" column_order must be specified.")
|
||||
|
||||
self.python_multiprocessing = python_multiprocessing
|
||||
self.process_pool = None
|
||||
|
||||
|
@ -3410,7 +3360,7 @@ class MapDataset(UnionBaseDataset):
|
|||
self.prepare_multiprocessing()
|
||||
|
||||
callbacks = [cb.create_runtime_obj() for cb in self.callbacks]
|
||||
return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns, self.column_order,
|
||||
return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns,
|
||||
callbacks, self.max_rowsize, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
|
||||
|
||||
def __deepcopy__(self, memodict):
|
||||
|
|
|
@ -1368,6 +1368,20 @@ def check_map(method):
|
|||
[operations, input_columns, output_columns, column_order, num_parallel_workers, param_dict], _ = \
|
||||
parse_user_args(method, *args, **kwargs)
|
||||
|
||||
if column_order is not None:
|
||||
raise ValueError("The parameter 'column_order' had been deleted in map operation. "
|
||||
"Please use '.project' operation instead.\n"
|
||||
">> # Usage of old api:\n"
|
||||
">> dataset = dataset.map(operations=PyFunc,\n"
|
||||
">> input_columns=[\"column_a\"],\n"
|
||||
">> output_columns=[\"column_b\", \"column_c\"],\n"
|
||||
">> column_order=[\"column_b\", \"column_c\"])\n"
|
||||
">> # Usage of new api:\n"
|
||||
">> dataset = dataset.map(operations=PyFunc,\n"
|
||||
">> input_columns=[\"column_a\"],\n"
|
||||
">> output_columns=[\"column_b\", \"column_c\"])\n"
|
||||
">> dataset = dataset.project([\"column_b\", \"column_c\"])")
|
||||
|
||||
(python_multiprocessing, max_rowsize, cache, callbacks, offload) = get_map_kwargs_from_dict(param_dict)
|
||||
|
||||
# check whether network computing operator exist in input operations(python function)
|
||||
|
@ -1394,10 +1408,8 @@ def check_map(method):
|
|||
raise ValueError("Input operations should not contain network computing operator like in "
|
||||
"mindspore.nn or mindspore.ops, got operation: ", str(item))
|
||||
|
||||
nreq_param_columns = ['input_columns', 'output_columns', 'column_order']
|
||||
nreq_param_columns = ['input_columns', 'output_columns']
|
||||
|
||||
if column_order is not None:
|
||||
type_check(column_order, (list,), "column_order")
|
||||
if num_parallel_workers is not None:
|
||||
check_num_parallel_workers(num_parallel_workers)
|
||||
type_check(python_multiprocessing, (bool,), "python_multiprocessing")
|
||||
|
@ -1413,7 +1425,7 @@ def check_map(method):
|
|||
else:
|
||||
type_check(callbacks, (DSCallback,), "callbacks")
|
||||
|
||||
for param_name, param in zip(nreq_param_columns, [input_columns, output_columns, column_order]):
|
||||
for param_name, param in zip(nreq_param_columns, [input_columns, output_columns]):
|
||||
if param is not None:
|
||||
check_columns(param, param_name)
|
||||
if callbacks is not None:
|
||||
|
|
|
@ -131,8 +131,7 @@ class JiebaTokenizer(TextTensorOperation):
|
|||
>>> # ["offsets_limit", dtype=uint32]}
|
||||
>>> tokenizer_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP, with_offsets=True)
|
||||
>>> text_file_dataset_1 = text_file_dataset_1.map(operations=tokenizer_op, input_columns=["text"],
|
||||
... output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
... column_order=["token", "offsets_start", "offsets_limit"])
|
||||
... output_columns=["token", "offsets_start", "offsets_limit"])
|
||||
"""
|
||||
|
||||
@check_jieba_init
|
||||
|
@ -629,8 +628,7 @@ class UnicodeCharTokenizer(TextTensorOperation):
|
|||
>>> # ["offsets_limit", dtype=uint32]}
|
||||
>>> tokenizer_op = text.UnicodeCharTokenizer(with_offsets=True)
|
||||
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
|
||||
... output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
... column_order=["token", "offsets_start", "offsets_limit"])
|
||||
... output_columns=["token", "offsets_start", "offsets_limit"])
|
||||
"""
|
||||
|
||||
@check_with_offsets
|
||||
|
@ -679,8 +677,7 @@ class WordpieceTokenizer(TextTensorOperation):
|
|||
>>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token='[UNK]',
|
||||
... max_bytes_per_token=100, with_offsets=True)
|
||||
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
|
||||
... output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
... column_order=["token", "offsets_start", "offsets_limit"])
|
||||
... output_columns=["token", "offsets_start", "offsets_limit"])
|
||||
"""
|
||||
|
||||
@check_wordpiece_tokenizer
|
||||
|
@ -766,9 +763,7 @@ if platform.system().lower() != 'windows':
|
|||
... with_offsets=True)
|
||||
>>> text_file_dataset_1 = text_file_dataset_1.map(operations=tokenizer_op, input_columns=["text"],
|
||||
... output_columns=["token", "offsets_start",
|
||||
... "offsets_limit"],
|
||||
... column_order=["token", "offsets_start",
|
||||
... "offsets_limit"])
|
||||
... "offsets_limit"])
|
||||
"""
|
||||
|
||||
@check_basic_tokenizer
|
||||
|
@ -862,9 +857,7 @@ if platform.system().lower() != 'windows':
|
|||
... with_offsets=True)
|
||||
>>> text_file_dataset_1 = text_file_dataset_1.map(operations=tokenizer_op, input_columns=["text"],
|
||||
... output_columns=["token", "offsets_start",
|
||||
... "offsets_limit"],
|
||||
... column_order=["token", "offsets_start",
|
||||
... "offsets_limit"])
|
||||
... "offsets_limit"])
|
||||
"""
|
||||
|
||||
@check_bert_tokenizer
|
||||
|
@ -1054,9 +1047,7 @@ if platform.system().lower() != 'windows':
|
|||
>>> tokenizer_op = text.RegexTokenizer(delim_pattern, with_offsets=True)
|
||||
>>> text_file_dataset_1 = text_file_dataset_1.map(operations=tokenizer_op, input_columns=["text"],
|
||||
... output_columns=["token", "offsets_start",
|
||||
... "offsets_limit"],
|
||||
... column_order=["token", "offsets_start",
|
||||
... "offsets_limit"])
|
||||
... "offsets_limit"])
|
||||
"""
|
||||
|
||||
@check_regex_tokenizer
|
||||
|
@ -1097,8 +1088,7 @@ if platform.system().lower() != 'windows':
|
|||
>>> # ["offsets_limit", dtype=uint32]}
|
||||
>>> tokenizer_op = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=True)
|
||||
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
|
||||
... output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
... column_order=["token", "offsets_start", "offsets_limit"])
|
||||
... output_columns=["token", "offsets_start", "offsets_limit"])
|
||||
|
||||
"""
|
||||
|
||||
|
@ -1139,8 +1129,7 @@ if platform.system().lower() != 'windows':
|
|||
>>> # ["offsets_limit", dtype=uint32]}
|
||||
>>> tokenizer_op = text.WhitespaceTokenizer(with_offsets=True)
|
||||
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
|
||||
... output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
... column_order=["token", "offsets_start", "offsets_limit"])
|
||||
... output_columns=["token", "offsets_start", "offsets_limit"])
|
||||
"""
|
||||
|
||||
@check_with_offsets
|
||||
|
|
|
@ -430,8 +430,7 @@ class Duplicate(TensorOperation):
|
|||
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["x"])
|
||||
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=c_transforms.Duplicate(),
|
||||
... input_columns=["x"],
|
||||
... output_columns=["x", "y"],
|
||||
... column_order=["x", "y"])
|
||||
... output_columns=["x", "y"])
|
||||
>>> # Data after
|
||||
>>> # | x | y |
|
||||
>>> # +---------+---------+
|
||||
|
@ -477,8 +476,7 @@ class Unique(TensorOperation):
|
|||
>>> dataset = ds.NumpySlicesDataset(data, ["x"])
|
||||
>>> dataset = dataset.map(operations=c_transforms.Unique(),
|
||||
... input_columns=["x"],
|
||||
... output_columns=["x", "y", "z"],
|
||||
... column_order=["x", "y", "z"])
|
||||
... output_columns=["x", "y", "z"])
|
||||
>>> # Data after
|
||||
>>> # | x | y |z |
|
||||
>>> # +---------+-----------------+---------+
|
||||
|
|
|
@ -402,8 +402,7 @@ class Duplicate(TensorOperation):
|
|||
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["x"])
|
||||
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms.Duplicate(),
|
||||
... input_columns=["x"],
|
||||
... output_columns=["x", "y"],
|
||||
... column_order=["x", "y"])
|
||||
... output_columns=["x", "y"])
|
||||
>>> # Data after
|
||||
>>> # | x | y |
|
||||
>>> # +---------+---------+
|
||||
|
@ -957,8 +956,7 @@ class Unique(TensorOperation):
|
|||
>>> dataset = ds.NumpySlicesDataset(data, ["x"])
|
||||
>>> dataset = dataset.map(operations=transforms.Unique(),
|
||||
... input_columns=["x"],
|
||||
... output_columns=["x", "y", "z"],
|
||||
... column_order=["x", "y", "z"])
|
||||
... output_columns=["x", "y", "z"])
|
||||
>>> # Data after
|
||||
>>> # | x | y |z |
|
||||
>>> # +---------+-----------------+---------+
|
||||
|
|
|
@ -312,8 +312,7 @@ class BoundingBoxAugment(ImageTensorOperation):
|
|||
>>> # map to apply ops
|
||||
>>> image_folder_dataset = image_folder_dataset.map(operations=[bbox_aug_op],
|
||||
... input_columns=["image", "bbox"],
|
||||
... output_columns=["image", "bbox"],
|
||||
... column_order=["image", "bbox"])
|
||||
... output_columns=["image", "bbox"])
|
||||
"""
|
||||
|
||||
@deprecated_c_vision()
|
||||
|
@ -2455,7 +2454,7 @@ class SlicePatches(ImageTensorOperation):
|
|||
>>> cols = ['img' + str(x) for x in range(num_h*num_w)]
|
||||
>>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
|
||||
... input_columns=["image"],
|
||||
... output_columns=cols, column_order=cols)
|
||||
... output_columns=cols)
|
||||
"""
|
||||
|
||||
@deprecated_c_vision()
|
||||
|
|
|
@ -581,8 +581,7 @@ class BoundingBoxAugment(ImageTensorOperation):
|
|||
>>> # map to apply ops
|
||||
>>> image_folder_dataset = image_folder_dataset.map(operations=[bbox_aug_op],
|
||||
... input_columns=["image", "bbox"],
|
||||
... output_columns=["image", "bbox"],
|
||||
... column_order=["image", "bbox"])
|
||||
... output_columns=["image", "bbox"])
|
||||
"""
|
||||
|
||||
@check_bounding_box_augment_cpp
|
||||
|
@ -3834,7 +3833,7 @@ class SlicePatches(ImageTensorOperation):
|
|||
>>> cols = ['img' + str(x) for x in range(num_h*num_w)]
|
||||
>>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
|
||||
... input_columns=["image"],
|
||||
... output_columns=cols, column_order=cols)
|
||||
... output_columns=cols)
|
||||
"""
|
||||
|
||||
@check_slice_patches
|
||||
|
|
|
@ -1436,7 +1436,6 @@ def _save_dataset_to_mindir(model, dataset):
|
|||
model.preprocessor.op.add()
|
||||
model.preprocessor.op[-1].input_columns = json.dumps(op['input_columns'])
|
||||
model.preprocessor.op[-1].output_columns = json.dumps(op['output_columns'])
|
||||
model.preprocessor.op[-1].project_columns = json.dumps(op['project_columns'])
|
||||
model.preprocessor.op[-1].op_type = json.dumps(op['op_type'])
|
||||
model.preprocessor.op[-1].operations = json.dumps(op['operations'])
|
||||
model.preprocessor.op[-1].offload = op['offload'] if 'offload' in op.keys() else False
|
||||
|
|
|
@ -212,7 +212,6 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100
|
|||
np.array(y).flatten().reshape(batch_size, 39),
|
||||
np.array(z).flatten().reshape(batch_size, 1))),
|
||||
input_columns=['feat_ids', 'feat_vals', 'label'],
|
||||
column_order=['feat_ids', 'feat_vals', 'label'],
|
||||
num_parallel_workers=8)
|
||||
data_set = data_set.repeat(epochs)
|
||||
return data_set
|
||||
|
@ -260,7 +259,6 @@ def _get_tf_dataset(directory, train_mode=True, epochs=1, batch_size=1000,
|
|||
np.array(y).flatten().reshape(batch_size, 39),
|
||||
np.array(z).flatten().reshape(batch_size, 1))),
|
||||
input_columns=['feat_ids', 'feat_vals', 'label'],
|
||||
column_order=['feat_ids', 'feat_vals', 'label'],
|
||||
num_parallel_workers=8)
|
||||
data_set = data_set.repeat(epochs)
|
||||
return data_set
|
||||
|
|
|
@ -60,7 +60,7 @@ def _get_tf_dataset(data_dir, train_mode=True, epochs=1, batch_size=1000,
|
|||
np.array(y).flatten().reshape(batch_size, 39),
|
||||
np.array(z).flatten().reshape(batch_size, 1))),
|
||||
input_columns=['feat_ids', 'feat_vals', 'label'],
|
||||
column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8)
|
||||
num_parallel_workers=8)
|
||||
# if train_mode:
|
||||
data_set = data_set.repeat(epochs)
|
||||
return data_set
|
||||
|
@ -101,7 +101,6 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100
|
|||
np.array(y).flatten().reshape(batch_size, 39),
|
||||
np.array(z).flatten().reshape(batch_size, 1))),
|
||||
input_columns=['feat_ids', 'feat_vals', 'label'],
|
||||
column_order=['feat_ids', 'feat_vals', 'label'],
|
||||
num_parallel_workers=8)
|
||||
data_set = data_set.repeat(epochs)
|
||||
return data_set
|
||||
|
|
|
@ -304,14 +304,13 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=10, device_num
|
|||
hwc_to_chw = C.HWC2CHW()
|
||||
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
|
||||
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
|
||||
column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
|
||||
num_parallel_workers=num_parallel_workers)
|
||||
ds = ds.project(["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"])
|
||||
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers)
|
||||
ds = ds.batch(batch_size, drop_remainder=True)
|
||||
ds = ds.repeat(repeat_num)
|
||||
else:
|
||||
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
|
||||
output_columns=["image", "image_shape", "annotation"],
|
||||
column_order=["image", "image_shape", "annotation"],
|
||||
num_parallel_workers=num_parallel_workers)
|
||||
return ds
|
||||
|
|
|
@ -181,7 +181,6 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
|
|||
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
|
||||
ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"],
|
||||
output_columns=["image", "image_shape", "img_id"],
|
||||
column_order=["image", "image_shape", "img_id"],
|
||||
num_parallel_workers=8)
|
||||
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8)
|
||||
ds = ds.batch(batch_size, drop_remainder=True)
|
||||
|
|
|
@ -71,7 +71,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiNestedCache) {
|
|||
EXPECT_NE(decode_op, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({decode_op}, {}, {}, {"image"}, some_cache);
|
||||
ds = ds->Map({decode_op}, {}, {}, some_cache);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
|
|
@ -236,7 +236,8 @@ TEST_F(MindDataTestPipeline, TestAlbumIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -272,7 +273,8 @@ TEST_F(MindDataTestPipeline, TestAlbumIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -138,7 +138,8 @@ TEST_F(MindDataTestPipeline, TestCaltech256IteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -174,7 +175,8 @@ TEST_F(MindDataTestPipeline, TestCaltech256IteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -94,8 +94,7 @@ TEST_F(MindDataTestPipeline, TestCMUArcticBasicWithPipeline) {
|
|||
auto op = transforms::PadEnd({1, 50000});;
|
||||
std::vector<std::string> input_columns = {"waveform"};
|
||||
std::vector<std::string> output_columns = {"waveform"};
|
||||
std::vector<std::string> project_columns = {"transcript", "utterance_id", "waveform"};
|
||||
ds = ds->Map({op}, input_columns, output_columns, project_columns);
|
||||
ds = ds->Map({op}, input_columns, output_columns);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
ds = ds->Repeat(10);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
|
|
@ -138,7 +138,8 @@ TEST_F(MindDataTestPipeline, TestDBpediaDatasetIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "class" column and drop others
|
||||
std::vector<std::string> columns = {"class"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -175,7 +176,8 @@ TEST_F(MindDataTestPipeline, TestDBpediaDatasetIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -145,7 +145,8 @@ TEST_F(MindDataTestPipeline, TestDIV2KIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"hr_image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -182,7 +183,8 @@ TEST_F(MindDataTestPipeline, TestDIV2KIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
@ -385,4 +387,4 @@ TEST_F(MindDataTestPipeline, TestDIV2KWithNullSamplerError) {
|
|||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
// Expect failure: invalid DIV2K input, sampler cannot be nullptr
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -237,7 +237,8 @@ TEST_F(MindDataTestPipeline, TestEMnistIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -273,7 +274,8 @@ TEST_F(MindDataTestPipeline, TestEMnistIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -136,7 +136,8 @@ TEST_F(MindDataTestPipeline, TestFakeImageIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -171,7 +172,8 @@ TEST_F(MindDataTestPipeline, TestFakeImageIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -140,7 +140,8 @@ TEST_F(MindDataTestPipeline, TestFashionMnistIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -232,7 +233,8 @@ TEST_F(MindDataTestPipeline, TestFashionMnistIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -136,7 +136,8 @@ TEST_F(MindDataTestPipeline, TestFlickrIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -172,7 +173,8 @@ TEST_F(MindDataTestPipeline, TestFlickrIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
@ -386,4 +388,4 @@ TEST_F(MindDataTestPipeline, TestFlickrWithNullSamplerError) {
|
|||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
// Expect failure: invalid Flickr30k input, sampler cannot be nullptr
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -90,8 +90,7 @@ TEST_F(MindDataTestPipeline, TestGTZANBasicWithPipeline) {
|
|||
auto op = transforms::PadEnd({1, 50000});
|
||||
std::vector<std::string> input_columns = {"waveform"};
|
||||
std::vector<std::string> output_columns = {"waveform"};
|
||||
std::vector<std::string> project_columns = {"label", "waveform", "sample_rate"};
|
||||
ds = ds->Map({op}, input_columns, output_columns, project_columns);
|
||||
ds = ds->Map({op}, input_columns, output_columns);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
ds = ds->Repeat(10);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
|
|
@ -209,7 +209,8 @@ TEST_F(MindDataTestPipeline, TestIMDBIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "text" column and drop others
|
||||
std::vector<std::string> columns = {"text"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -245,7 +246,8 @@ TEST_F(MindDataTestPipeline, TestIMDBIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
@ -317,4 +319,4 @@ TEST_F(MindDataTestPipeline, TestIMDBWithNullSamplerError) {
|
|||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
// Expect failure: invalid IMDB input, sampler cannot be nullptr
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -80,7 +80,8 @@ TEST_F(MindDataTestPipeline, TestIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -121,7 +122,8 @@ TEST_F(MindDataTestPipeline, TestIteratorReOrder) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Reorder "image" and "label" column
|
||||
std::vector<std::string> columns = {"label", "image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -165,7 +167,8 @@ TEST_F(MindDataTestPipeline, TestIteratorTwoColumns) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" and "bbox" column
|
||||
std::vector<std::string> columns = {"image", "bbox"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -204,7 +207,8 @@ TEST_F(MindDataTestPipeline, TestIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
@ -220,7 +224,7 @@ TEST_F(MindDataTestPipeline, TestIteratorNumEpoch) {
|
|||
ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2}));
|
||||
std::shared_ptr<Dataset> ds = RandomData(random_data_num_row, schema)->SetNumWorkers(1);
|
||||
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator({}, num_epochs);
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(num_epochs);
|
||||
ASSERT_NE(iter, nullptr); // should terminate test case if iterator is null
|
||||
std::unordered_map<std::string, mindspore::MSTensor> row;
|
||||
|
||||
|
@ -253,6 +257,6 @@ TEST_F(MindDataTestPipeline, TestIteratorNumEpochFail) {
|
|||
ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2}));
|
||||
std::shared_ptr<Dataset> ds = RandomData(3, schema)->SetNumWorkers(1);
|
||||
// expect nullptr due to incorrect num_epochs value.
|
||||
EXPECT_EQ(ds->CreateIterator({}, 0), nullptr);
|
||||
EXPECT_EQ(ds->CreateIterator({}, -2), nullptr);
|
||||
EXPECT_EQ(ds->CreateIterator(0), nullptr);
|
||||
EXPECT_EQ(ds->CreateIterator(-2), nullptr);
|
||||
}
|
||||
|
|
|
@ -140,7 +140,8 @@ TEST_F(MindDataTestPipeline, TestKMnistDatasetIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -176,7 +177,8 @@ TEST_F(MindDataTestPipeline, TestKMnistDatasetIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
@ -248,7 +250,8 @@ TEST_F(MindDataTestPipeline, TestKMnistIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -72,9 +72,7 @@ TEST_F(MindDataTestPipeline, TestLibriTTSBasicWithPipeline) {
|
|||
auto op = transforms::PadEnd({1, 500000});
|
||||
std::vector<std::string> input_columns = {"waveform"};
|
||||
std::vector<std::string> output_columns = {"waveform"};
|
||||
std::vector<std::string> project_columns = {"sample_rate", "original_text", "normalized_text", "speaker_id",
|
||||
"chapter_id", "utterance_id", "waveform"};
|
||||
ds = ds->Map({op}, input_columns, output_columns, project_columns);
|
||||
ds = ds->Map({op}, input_columns, output_columns);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
ds = ds->Repeat(5);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
@ -308,4 +306,4 @@ TEST_F(MindDataTestPipeline, TestLibriTTSSequentialSamplers) {
|
|||
EXPECT_EQ(i, 2);
|
||||
|
||||
iter->Stop();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -141,7 +141,8 @@ TEST_F(MindDataTestPipeline, TestLJSpeechDatasetIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "waveform" column and drop others
|
||||
std::vector<std::string> columns = {"waveform"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -175,7 +176,8 @@ TEST_F(MindDataTestPipeline, TestLJSpeechDatasetIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -130,7 +130,8 @@ TEST_F(MindDataTestPipeline, TestManifestIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -164,7 +165,8 @@ TEST_F(MindDataTestPipeline, TestManifestIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -1087,7 +1087,7 @@ TEST_F(MindDataTestPipeline, TestProjectMap) {
|
|||
EXPECT_NE(random_vertical_flip_op, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"});
|
||||
ds = ds->Map({random_vertical_flip_op}, {}, {});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Project operation on ds
|
||||
|
@ -1139,7 +1139,7 @@ TEST_F(MindDataTestPipeline, TestProjectDuplicateColumnFail) {
|
|||
EXPECT_NE(random_vertical_flip_op, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"});
|
||||
ds = ds->Map({random_vertical_flip_op}, {}, {});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Project operation on ds
|
||||
|
@ -1171,7 +1171,7 @@ TEST_F(MindDataTestPipeline, TestMapDuplicateColumnFail) {
|
|||
EXPECT_NE(random_vertical_flip_op, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
auto ds1 = ds->Map({random_vertical_flip_op}, {"image", "image"}, {}, {});
|
||||
auto ds1 = ds->Map({random_vertical_flip_op}, {"image", "image"}, {});
|
||||
EXPECT_NE(ds1, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -1180,7 +1180,7 @@ TEST_F(MindDataTestPipeline, TestMapDuplicateColumnFail) {
|
|||
EXPECT_EQ(iter1, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
auto ds2 = ds->Map({random_vertical_flip_op}, {}, {"label", "label"}, {});
|
||||
auto ds2 = ds->Map({random_vertical_flip_op}, {}, {"label", "label"});
|
||||
EXPECT_NE(ds2, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -1189,13 +1189,8 @@ TEST_F(MindDataTestPipeline, TestMapDuplicateColumnFail) {
|
|||
EXPECT_EQ(iter2, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
auto ds3 = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "image"});
|
||||
auto ds3 = ds->Map({random_vertical_flip_op}, {}, {});
|
||||
EXPECT_NE(ds3, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
std::shared_ptr<Iterator> iter3 = ds3->CreateIterator();
|
||||
// Expect failure: duplicate Map op project column name
|
||||
EXPECT_EQ(iter3, nullptr);
|
||||
}
|
||||
|
||||
/// Feature: Map op
|
||||
|
@ -1211,7 +1206,7 @@ TEST_F(MindDataTestPipeline, TestMapNullOperation) {
|
|||
|
||||
// Create a Map operation on ds
|
||||
std::shared_ptr<TensorTransform> operation = nullptr;
|
||||
auto ds1 = ds->Map({operation}, {"image"}, {}, {});
|
||||
auto ds1 = ds->Map({operation}, {"image"}, {});
|
||||
EXPECT_NE(ds1, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -1241,13 +1236,13 @@ TEST_F(MindDataTestPipeline, TestProjectMapAutoInjection) {
|
|||
EXPECT_NE(resize_op, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
// {"image"} is the project columns. This will trigger auto injection of ProjectOp after MapOp.
|
||||
ds = ds->Map({resize_op}, {}, {}, {"image"});
|
||||
ds = ds->Map({resize_op}, {}, {});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project({"image"});
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// iterate over the dataset and get each row
|
||||
|
@ -2470,13 +2465,13 @@ TEST_F(MindDataTestPipeline, TestTFRecordDecodeRepeatResize) {
|
|||
EXPECT_NE(resize_op, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
// {"image"} is the project columns. This will trigger auto injection of ProjectOp after MapOp.
|
||||
ds = ds->Map({decode_op, resize_op}, {}, {}, {"image"});
|
||||
ds = ds->Map({decode_op, resize_op}, {}, {});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project({"image"});
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// iterate over the dataset and get each row
|
||||
|
@ -2756,9 +2751,11 @@ TEST_F(MindDataTestPipeline, Test1to3) {
|
|||
EXPECT_NE(one_to_three_op, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({one_to_three_op}, {"image"}, {"X", "Y", "Z"}, {"X", "Y", "Z", "label", "A", "B"});
|
||||
ds = ds->Map({one_to_three_op}, {"image"}, {"X", "Y", "Z"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
ds = ds->Project({"X", "Y", "Z", "label", "A", "B"});
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
|
|
|
@ -176,7 +176,8 @@ TEST_F(MindDataTestPipeline, TestPennTreebankDatasetIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "text" column and drop others
|
||||
std::vector<std::string> columns = {"text"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -216,7 +217,8 @@ TEST_F(MindDataTestPipeline, TestPennTreebankDatasetIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -174,7 +174,8 @@ TEST_F(MindDataTestPipeline, TestPhotoTourIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -210,7 +211,8 @@ TEST_F(MindDataTestPipeline, TestPhotoTourIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -218,7 +218,8 @@ TEST_F(MindDataTestPipeline, TestPlaces365IteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -255,7 +256,8 @@ TEST_F(MindDataTestPipeline, TestPlaces365IteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -288,7 +288,8 @@ TEST_F(MindDataTestPipeline, TestQMnistIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -324,7 +325,8 @@ TEST_F(MindDataTestPipeline, TestQMnistIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -139,7 +139,8 @@ TEST_F(MindDataTestPipeline, TestSBUIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -173,7 +174,8 @@ TEST_F(MindDataTestPipeline, TestSBUIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -137,7 +137,8 @@ TEST_F(MindDataTestPipeline, TestSemeionIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -173,7 +174,8 @@ TEST_F(MindDataTestPipeline, TestSemeionIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -143,7 +143,8 @@ TEST_F(MindDataTestPipeline, TestSpeechCommandsDatasetIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "waveform" column and drop others
|
||||
std::vector<std::string> columns = {"waveform"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -179,7 +180,8 @@ TEST_F(MindDataTestPipeline, TestSpeechCommandsDatasetIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -287,7 +287,8 @@ TEST_F(MindDataTestPipeline, TestSTL10DatasetIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -323,7 +324,8 @@ TEST_F(MindDataTestPipeline, TestSTL10DatasetIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -212,7 +212,8 @@ TEST_F(MindDataTestPipeline, TestTedliumDatasetIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "waveform" column and drop others
|
||||
std::vector<std::string> columns = {"waveform"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -246,7 +247,8 @@ TEST_F(MindDataTestPipeline, TestTedliumDatasetIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasic) {
|
|||
EXPECT_NE(random_horizontal_flip_op, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({decode_op, random_horizontal_flip_op}, {}, {}, {"image"});
|
||||
ds = ds->Map({decode_op, random_horizontal_flip_op}, {}, {});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
|
@ -109,7 +109,7 @@ TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasicGetters) {
|
|||
EXPECT_NE(random_horizontal_flip_op, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({random_horizontal_flip_op}, {}, {}, {"image"});
|
||||
ds = ds->Map({random_horizontal_flip_op}, {}, {});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
|
@ -624,4 +624,4 @@ TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasic7Row) {
|
|||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -212,7 +212,8 @@ TEST_F(MindDataTestPipeline, TestUSPSIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -248,7 +249,8 @@ TEST_F(MindDataTestPipeline, TestUSPSIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -172,7 +172,8 @@ TEST_F(MindDataTestPipeline, TestWikiTextIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "text" column and drop others
|
||||
std::vector<std::string> columns = {"text"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -216,7 +217,8 @@ TEST_F(MindDataTestPipeline, TestWikiTextIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -195,7 +195,8 @@ TEST_F(MindDataTestPipeline, TestYelpReviewDatasetIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "text" column and drop others
|
||||
std::vector<std::string> columns = {"text"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -229,7 +230,8 @@ TEST_F(MindDataTestPipeline, TestYelpReviewDatasetIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -134,7 +134,8 @@ TEST_F(MindDataTestPipeline, TestYesNoDatasetIteratorOneColumn) {
|
|||
// Create an iterator over the result of the above dataset
|
||||
// Only select "waveform" column and drop others
|
||||
std::vector<std::string> columns = {"waveform"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
@ -170,7 +171,8 @@ TEST_F(MindDataTestPipeline, TestYesNoGetDatasetIteratorWrongColumn) {
|
|||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -980,8 +980,7 @@ TEST_F(MindDataTestPipeline, TestJiebaTokenizerSuccess2) {
|
|||
EXPECT_NE(jieba_tokenizer, nullptr);
|
||||
|
||||
// Create Map operation on ds
|
||||
ds = ds->Map({jieba_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"},
|
||||
{"token", "offsets_start", "offsets_limit"});
|
||||
ds = ds->Map({jieba_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -2804,8 +2803,7 @@ TEST_F(MindDataTestPipeline, TestRegexTokenizerSuccess1) {
|
|||
EXPECT_NE(regex_tokenizer, nullptr);
|
||||
|
||||
// Create Map operation on ds
|
||||
ds = ds->Map({regex_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"},
|
||||
{"token", "offsets_start", "offsets_limit"});
|
||||
ds = ds->Map({regex_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -2939,8 +2937,7 @@ TEST_F(MindDataTestPipeline, TestUnicodeCharTokenizerSuccess1) {
|
|||
EXPECT_NE(unicodechar_tokenizer, nullptr);
|
||||
|
||||
// Create Map operation on ds
|
||||
ds = ds->Map({unicodechar_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"},
|
||||
{"token", "offsets_start", "offsets_limit"});
|
||||
ds = ds->Map({unicodechar_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -3570,8 +3567,7 @@ TEST_F(MindDataTestPipeline, TestUnicodeScriptTokenizerSuccess2) {
|
|||
EXPECT_NE(unicodescript_tokenizer, nullptr);
|
||||
|
||||
// Create Map operation on ds
|
||||
ds = ds->Map({unicodescript_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"},
|
||||
{"token", "offsets_start", "offsets_limit"});
|
||||
ds = ds->Map({unicodescript_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -3642,8 +3638,7 @@ TEST_F(MindDataTestPipeline, TestUnicodeScriptTokenizerSuccess3) {
|
|||
EXPECT_NE(unicodescript_tokenizer, nullptr);
|
||||
|
||||
// Create Map operation on ds
|
||||
ds = ds->Map({unicodescript_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"},
|
||||
{"token", "offsets_start", "offsets_limit"});
|
||||
ds = ds->Map({unicodescript_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -3766,8 +3761,7 @@ TEST_F(MindDataTestPipeline, TestWhitespaceTokenizerSuccess1) {
|
|||
EXPECT_NE(white_tokenizer, nullptr);
|
||||
|
||||
// Create Map operation on ds
|
||||
ds = ds->Map({white_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"},
|
||||
{"token", "offsets_start", "offsets_limit"});
|
||||
ds = ds->Map({white_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
|
|
@ -43,7 +43,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentSuccess1Shr) {
|
|||
auto bound_box_augment_op = std::make_shared<vision::BoundingBoxAugment>(random_rotation_op, 1.0);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -86,7 +86,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentSuccess2Auto) {
|
|||
auto bound_box_augment_op(new vision::BoundingBoxAugment({random_rotation_op}, 1.0));
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -132,7 +132,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentSuccess3Obj) {
|
|||
vision::BoundingBoxAugment bound_box_augment_op = vision::BoundingBoxAugment({random_rotation_op}, 1.0);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -176,7 +176,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentFail1) {
|
|||
auto bound_box_augment_op = std::make_shared<vision::BoundingBoxAugment>(random_rotation_op, -1.0);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -204,7 +204,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentFail2) {
|
|||
auto bound_box_augment_op = std::make_shared<vision::BoundingBoxAugment>(random_rotation_op, 2.0);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -229,7 +229,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentFail3) {
|
|||
auto bound_box_augment_op = std::make_shared<vision::BoundingBoxAugment>(nullptr, 0.5);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -258,7 +258,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentFail4) {
|
|||
auto bound_box_augment_op = std::make_shared<vision::BoundingBoxAugment>(random_rotation_op, 0.25);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
|
|
@ -252,7 +252,7 @@ TEST_F(MindDataTestPipeline, TestResizeWithBBoxSuccess) {
|
|||
// Note: No need to check for output after calling API class constructor
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({resize_with_bbox_op, resize_with_bbox_op1}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({resize_with_bbox_op, resize_with_bbox_op1}, {"image", "bbox"}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
|
|
@ -548,7 +548,7 @@ TEST_F(MindDataTestPipeline, TestRandomCropWithBboxSuccess) {
|
|||
std::vector<int32_t>{128, 128});
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({random_crop}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({random_crop}, {"image", "bbox"}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -688,7 +688,7 @@ TEST_F(MindDataTestPipeline, TestRandomHorizontalFlipWithBBoxSuccess) {
|
|||
std::make_shared<vision::RandomHorizontalFlipWithBBox>(0.5);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({random_horizontal_flip_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({random_horizontal_flip_op}, {"image", "bbox"}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -1767,7 +1767,7 @@ TEST_F(MindDataTestPipeline, TestRandomVerticalFlipWithBBoxSuccess) {
|
|||
std::shared_ptr<TensorTransform> random_vertical_flip_op = std::make_shared<vision::RandomVerticalFlipWithBBox>(0.4);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({random_vertical_flip_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({random_vertical_flip_op}, {"image", "bbox"}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
|
|
@ -72,7 +72,7 @@ TEST_F(MindDataTestSlicePatches, TestSlicePatchesPipeline) {
|
|||
auto slice_patches = std::make_shared<vision::SlicePatches>(2, 2);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({slice_patches}, {"image"}, {"img0", "img1", "img2", "img3"}, {"img0", "img1", "img2", "img3"});
|
||||
ds = ds->Map({slice_patches}, {"image"}, {"img0", "img1", "img2", "img3"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
|
@ -82,7 +82,8 @@ TEST_F(MindDataTestSlicePatches, TestSlicePatchesPipeline) {
|
|||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project({"img0", "img1", "img2", "img3"});
|
||||
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
|
|
|
@ -205,7 +205,7 @@ TEST_F(MindDataTestPipeline, TestUniformAugmentFail1num_ops) {
|
|||
std::vector<std::shared_ptr<TensorTransform>>{random_crop_op, center_crop_op}, 0);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -234,7 +234,7 @@ TEST_F(MindDataTestPipeline, TestUniformAugmentFail2num_ops) {
|
|||
std::vector<std::shared_ptr<TensorTransform>>{random_crop_op, center_crop_op}, 3);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -263,7 +263,7 @@ TEST_F(MindDataTestPipeline, TestUniformAugmentFail3transforms) {
|
|||
std::vector<std::shared_ptr<TensorTransform>>{random_crop_op}, 1);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -291,7 +291,7 @@ TEST_F(MindDataTestPipeline, TestUniformAugmentFail4transforms) {
|
|||
std::vector<std::shared_ptr<TensorTransform>>{random_crop_op, nullptr}, 2);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
@ -316,7 +316,7 @@ TEST_F(MindDataTestPipeline, TestUniformAugmentFail5transforms) {
|
|||
auto uniform_aug_op = std::make_shared<vision::UniformAugment>(list, 1);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
|
||||
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
|
|
|
@ -323,14 +323,14 @@ TEST_F(MindDataTestCallback, TestSelectedCallback) {
|
|||
ASSERT_NE(ds, nullptr);
|
||||
ds->SetNumWorkers(1);
|
||||
// config mapOp
|
||||
ds = ds->Map({std::make_shared<transforms::TypeCast>(mindspore::DataType::kNumberTypeUInt64)}, {"label"}, {}, {},
|
||||
ds = ds->Map({std::make_shared<transforms::TypeCast>(mindspore::DataType::kNumberTypeUInt64)}, {"label"}, {},
|
||||
nullptr, {tst_cb});
|
||||
ds->SetNumWorkers(1);
|
||||
ASSERT_NE(ds, nullptr);
|
||||
ds = ds->Repeat(2);
|
||||
ASSERT_NE(ds, nullptr);
|
||||
int32_t num_epochs = 2;
|
||||
auto itr = ds->CreateIterator({}, num_epochs);
|
||||
auto itr = ds->CreateIterator(num_epochs);
|
||||
for (int ep_num = 0; ep_num < num_epochs; ++ep_num) {
|
||||
std::unordered_map<std::string, mindspore::MSTensor> row;
|
||||
ASSERT_OK(itr->GetNextRow(&row));
|
||||
|
@ -365,7 +365,7 @@ TEST_F(MindDataTestCallback, TestCAPICallback) {
|
|||
ASSERT_OK(schema->add_column("label", mindspore::DataType::kNumberTypeUInt32, {}));
|
||||
std::shared_ptr<Dataset> ds = RandomData(44, schema);
|
||||
ASSERT_NE(ds, nullptr);
|
||||
ds = ds->Map({std::make_shared<transforms::TypeCast>(mindspore::DataType::kNumberTypeUInt64)}, {"label"}, {}, {},
|
||||
ds = ds->Map({std::make_shared<transforms::TypeCast>(mindspore::DataType::kNumberTypeUInt64)}, {"label"}, {},
|
||||
nullptr, {cb1});
|
||||
ASSERT_NE(ds, nullptr);
|
||||
ds = ds->Repeat(2);
|
||||
|
|
|
@ -123,12 +123,14 @@ TEST_F(MindDataTestTreeAdapter, TestProjectMapTreeAdapter) {
|
|||
EXPECT_NE(one_hot, nullptr);
|
||||
|
||||
// Create a Map operation, this will automatically add a project after map
|
||||
ds = ds->Map({one_hot}, {"label"}, {"label"}, {"label"});
|
||||
ds = ds->Map({one_hot}, {"label"}, {"label"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
std::shared_ptr<ProjectDataset> project_ds = ds->Project({"label"});
|
||||
|
||||
auto tree_adapter = std::make_shared<TreeAdapter>();
|
||||
|
||||
Status rc = tree_adapter->Compile(ds->IRNode(), 2);
|
||||
Status rc = tree_adapter->Compile(project_ds->IRNode(), 2);
|
||||
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
||||
|
|
|
@ -53,9 +53,11 @@ class MindDataTestProfiler : public UT::DatasetOpTesting {
|
|||
EXPECT_NE(one_hot, nullptr);
|
||||
|
||||
// Create a Map operation, this will automatically add a project after map
|
||||
ds = ds->Map({one_hot}, {"label"}, {"label"}, {"label"});
|
||||
ds = ds->Map({one_hot}, {"label"}, {"label"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
ds = ds->Project({"label"});
|
||||
|
||||
ds = ds->Take(op_input);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
|
@ -98,7 +100,7 @@ TEST_F(MindDataTestProfiler, TestProfilerManager1) {
|
|||
EXPECT_NE(one_hot, nullptr);
|
||||
|
||||
// Create a Map operation, this will automatically add a project after map
|
||||
ds = ds->Map({one_hot}, {"label"}, {"label"}, {"label"});
|
||||
ds = ds->Map({one_hot}, {"label"}, {"label"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
ds = ds->Take(4);
|
||||
|
@ -107,7 +109,6 @@ TEST_F(MindDataTestProfiler, TestProfilerManager1) {
|
|||
ds = ds->Batch(2, true);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// No columns are specified, use all columns
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
|
@ -160,7 +161,6 @@ TEST_F(MindDataTestProfiler, TestProfilerManager2) {
|
|||
ds = ds->Batch(2, false);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// No columns are specified, use all columns
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
|
@ -202,9 +202,7 @@ TEST_F(MindDataTestProfiler, TestProfilerManagerByEpoch) {
|
|||
|
||||
std::shared_ptr<Dataset> ds = set_dataset(20);
|
||||
|
||||
// No columns are specified, use all columns
|
||||
std::vector<std::string> columns = {};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, 3);
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(3);
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
std::vector<uint8_t> cpu_result;
|
||||
|
@ -287,9 +285,7 @@ TEST_F(MindDataTestProfiler, TestProfilerManagerByStep) {
|
|||
|
||||
std::shared_ptr<Dataset> ds = set_dataset(20);
|
||||
|
||||
// No columns are specified, use all columns
|
||||
std::vector<std::string> columns = {};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, 3);
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(3);
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
std::vector<uint8_t> cpu_result;
|
||||
|
@ -381,9 +377,7 @@ TEST_F(MindDataTestProfiler, TestProfilerManagerByTime) {
|
|||
|
||||
std::shared_ptr<Dataset> ds = set_dataset(20);
|
||||
|
||||
// No columns are specified, use all columns
|
||||
std::vector<std::string> columns = {};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, 5);
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(5);
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
std::vector<uint8_t> cpu_result;
|
||||
|
|
|
@ -52,6 +52,7 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False):
|
|||
|
||||
# map to apply ops
|
||||
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
|
||||
data_voc2 = data_voc2.project(["image", "bbox"])
|
||||
|
||||
filename = "bounding_box_augment_rotation_c_result.npz"
|
||||
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||
|
@ -84,6 +85,7 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False):
|
|||
|
||||
# map to apply ops
|
||||
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
|
||||
data_voc2 = data_voc2.project(["image", "bbox"])
|
||||
|
||||
filename = "bounding_box_augment_crop_c_result.npz"
|
||||
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||
|
@ -116,6 +118,7 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False):
|
|||
|
||||
# map to apply ops
|
||||
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
|
||||
data_voc2 = data_voc2.project(["image", "bbox"])
|
||||
|
||||
filename = "bounding_box_augment_valid_ratio_c_result.npz"
|
||||
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||
|
@ -170,6 +173,7 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False):
|
|||
data_voc1 = helper_perform_ops_bbox_edgecase_float(data_voc1)
|
||||
data_voc2 = helper_perform_ops_bbox_edgecase_float(data_voc2)
|
||||
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
|
||||
data_voc2 = data_voc2.project(["image", "bbox"])
|
||||
filename = "bounding_box_augment_valid_edge_c_result.npz"
|
||||
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||
|
||||
|
|
|
@ -124,8 +124,7 @@ def test_lambdas():
|
|||
def test_config(arr, input_columns, output_cols, op_list):
|
||||
data = ds.NumpySlicesDataset(
|
||||
arr, column_names=input_columns, shuffle=False)
|
||||
data = data.map(operations=op_list, input_columns=input_columns, output_columns=output_cols,
|
||||
column_order=output_cols)
|
||||
data = data.map(operations=op_list, input_columns=input_columns, output_columns=output_cols)
|
||||
res = []
|
||||
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
for col_name in output_cols:
|
||||
|
@ -159,8 +158,7 @@ def test_c_py_compose_transforms_module():
|
|||
def test_config(arr, input_columns, output_cols, op_list):
|
||||
data = ds.NumpySlicesDataset(
|
||||
arr, column_names=input_columns, shuffle=False)
|
||||
data = data.map(operations=op_list, input_columns=input_columns, output_columns=output_cols,
|
||||
column_order=output_cols)
|
||||
data = data.map(operations=op_list, input_columns=input_columns, output_columns=output_cols)
|
||||
res = []
|
||||
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
for col_name in output_cols:
|
||||
|
|
|
@ -94,8 +94,8 @@ def test_concatenate_op_multi_input_string():
|
|||
|
||||
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor, append=append_tensor)
|
||||
|
||||
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"],
|
||||
output_columns=["out1"])
|
||||
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], output_columns=["out1"])
|
||||
data = data.project(["out1"])
|
||||
expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"])
|
||||
for data_row in data.create_tuple_iterator(num_epochs=1, output_numpy=True):
|
||||
np.testing.assert_array_equal(data_row[0], expected)
|
||||
|
@ -114,8 +114,8 @@ def test_concatenate_op_multi_input_numeric():
|
|||
|
||||
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor)
|
||||
|
||||
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"],
|
||||
output_columns=["out1"])
|
||||
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], output_columns=["out1"])
|
||||
data = data.project(["out1"])
|
||||
expected = np.array([3, 5, 1, 2, 3, 4])
|
||||
for data_row in data.create_tuple_iterator(num_epochs=1, output_numpy=True):
|
||||
np.testing.assert_array_equal(data_row[0], expected)
|
||||
|
|
|
@ -293,7 +293,8 @@ def test_generator_8():
|
|||
data1 = data1.map(operations=(lambda x: x * 3), input_columns="col0", output_columns="out0",
|
||||
num_parallel_workers=2)
|
||||
data1 = data1.map(operations=(lambda x: (x * 7, x)), input_columns="col1", output_columns=["out1", "out2"],
|
||||
num_parallel_workers=2, column_order=["out0", "out1", "out2"])
|
||||
num_parallel_workers=2)
|
||||
data1 = data1.project(["out0", "out1", "out2"])
|
||||
data1 = data1.map(operations=(lambda x: x + 1), input_columns="out2", output_columns="out2",
|
||||
num_parallel_workers=2)
|
||||
|
||||
|
@ -325,8 +326,6 @@ def test_generator_9():
|
|||
num_parallel_workers=4)
|
||||
|
||||
# Expected column order is not changed.
|
||||
# data1 = data[0] is "image" and data[1] is "label"
|
||||
# data2 = data[0] is "label" and data[1] is "image"
|
||||
i = 0
|
||||
for data1, data2 in zip(data1, data2): # each data is a dictionary
|
||||
golden = np.array([i])
|
||||
|
@ -352,7 +351,8 @@ def test_generator_10():
|
|||
# apply dataset operations
|
||||
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
|
||||
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"],
|
||||
column_order=['col0', 'out1', 'out2'], num_parallel_workers=2)
|
||||
num_parallel_workers=2)
|
||||
data1 = data1.project(['col0', 'out1', 'out2'])
|
||||
|
||||
# Expected column order is |col0|out1|out2|
|
||||
i = 0
|
||||
|
@ -369,21 +369,21 @@ def test_generator_10():
|
|||
def test_generator_11():
|
||||
"""
|
||||
Feature: GeneratorDataset
|
||||
Description: Test map column order len(input_columns) != len(output_columns), column_order drops some columns
|
||||
Description: Test .project drops some columns
|
||||
Expectation: The dataset is processed as expected
|
||||
"""
|
||||
logger.info("Test map column order when len(input_columns) != len(output_columns), "
|
||||
"and column_order drops some columns.")
|
||||
logger.info("Test .project drops some columns.")
|
||||
|
||||
# apply dataset operations
|
||||
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
|
||||
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"],
|
||||
column_order=['out1', 'out2'], num_parallel_workers=2)
|
||||
num_parallel_workers=2)
|
||||
data1 = data1.project(["out1", "out2"])
|
||||
|
||||
# Expected column order is |out1|out2|
|
||||
i = 0
|
||||
for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
|
||||
# len should be 2 because col0 is dropped (not included in column_order)
|
||||
# len should be 2 because col0 is dropped
|
||||
assert len(item) == 2
|
||||
golden = np.array([[i, i + 1], [i + 2, i + 3]])
|
||||
np.testing.assert_array_equal(item[0], golden)
|
||||
|
@ -415,7 +415,8 @@ def test_generator_12():
|
|||
i = i + 1
|
||||
|
||||
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
|
||||
data1 = data1.map(operations=(lambda x: (x * 5)), column_order=["col1", "col0"], num_parallel_workers=2)
|
||||
data1 = data1.map(operations=(lambda x: (x * 5)), num_parallel_workers=2)
|
||||
data1 = data1.project(["col1", "col0"])
|
||||
|
||||
# Expected column order is |col0|col1|
|
||||
i = 0
|
||||
|
@ -451,7 +452,7 @@ def test_generator_13():
|
|||
i = i + 1
|
||||
|
||||
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
|
||||
# len should be 2 because col0 is dropped (not included in column_order)
|
||||
# len should be 2 because col0 is dropped
|
||||
assert len(item) == 2
|
||||
golden = np.array([i * 5])
|
||||
np.testing.assert_array_equal(item["out0"], golden)
|
||||
|
@ -587,7 +588,7 @@ def test_generator_18():
|
|||
i = i + 1
|
||||
|
||||
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
|
||||
# len should be 2 because col0 is dropped (not included in column_order)
|
||||
# len should be 2 because col0 is dropped
|
||||
assert len(item) == 2
|
||||
golden = np.array([i * 5])
|
||||
np.testing.assert_array_equal(item["out0"], golden)
|
||||
|
@ -601,7 +602,7 @@ def test_generator_19():
|
|||
Description: Test multiprocessing 2 different large columns
|
||||
Expectation: The dataset is processed as expected
|
||||
"""
|
||||
logger.info("Test map column order when input_columns is None.")
|
||||
logger.info("Test map multiprocessing 2 different large columns.")
|
||||
|
||||
# apply dataset operations
|
||||
data1 = ds.GeneratorDataset(DatasetGeneratorLarge(), ["col0", "col1"], python_multiprocessing=True, shuffle=False)
|
||||
|
@ -713,24 +714,6 @@ def test_generator_error_2():
|
|||
assert "Data type of 1th item of the input or its converted Numpy array is expected" in str(info.value)
|
||||
|
||||
|
||||
def test_generator_error_3():
|
||||
"""
|
||||
Feature: GeneratorDataset
|
||||
Description: Test GeneratorDataset when len(input_columns) != len(output_columns) and column_order is not specified
|
||||
Expectation: Error is raised as expected
|
||||
"""
|
||||
with pytest.raises(ValueError) as info:
|
||||
# apply dataset operations
|
||||
data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
|
||||
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"], output_columns=["out1", "out2"],
|
||||
num_parallel_workers=2)
|
||||
|
||||
for _ in data1:
|
||||
pass
|
||||
assert "When length of input_columns and output_columns are not equal, column_order must be specified." in \
|
||||
str(info.value)
|
||||
|
||||
|
||||
def test_generator_error_4():
|
||||
"""
|
||||
Feature: GeneratorDataset
|
||||
|
|
|
@ -882,7 +882,7 @@ def test_imagefolder_exception():
|
|||
data = ds.ImageFolderDataset(DATA_DIR)
|
||||
data = data.map(operations=exception_func2, input_columns=["image", "label"],
|
||||
output_columns=["image", "label", "label1"],
|
||||
column_order=["image", "label", "label1"], num_parallel_workers=1)
|
||||
num_parallel_workers=1)
|
||||
for _ in data.__iter__():
|
||||
pass
|
||||
assert False
|
||||
|
|
|
@ -692,7 +692,7 @@ def test_imdb_exception():
|
|||
data = ds.IMDBDataset(DATA_DIR)
|
||||
data = data.map(operations=exception_func2, input_columns=["text", "label"],
|
||||
output_columns=["text", "label", "label1"],
|
||||
column_order=["text", "label", "label1"], num_parallel_workers=1)
|
||||
num_parallel_workers=1)
|
||||
for _ in data.__iter__():
|
||||
pass
|
||||
assert False
|
||||
|
|
|
@ -568,7 +568,6 @@ def test_lsun_exception_map():
|
|||
data = data.map(operations=exception_func2,
|
||||
input_columns=["image", "label"],
|
||||
output_columns=["image", "label", "label1"],
|
||||
column_order=["image", "label", "label1"],
|
||||
num_parallel_workers=1)
|
||||
for _ in data.__iter__():
|
||||
pass
|
||||
|
|
|
@ -448,7 +448,6 @@ def test_omniglot_exception():
|
|||
data = data.map(operations=exception_func2,
|
||||
input_columns=["image", "label"],
|
||||
output_columns=["image", "label", "label1"],
|
||||
column_order=["image", "label", "label1"],
|
||||
num_parallel_workers=1)
|
||||
for _ in data.__iter__():
|
||||
pass
|
||||
|
|
|
@ -135,8 +135,8 @@ def test_voc_meta_column():
|
|||
return img, img, label
|
||||
|
||||
data3 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True)
|
||||
data3 = data3.map(operations=pyfunc2, input_columns=["image", "target"], output_columns=["img1", "img2", "label"],
|
||||
column_order=["_meta-filename", "img1", "img2", "label"])
|
||||
data3 = data3.map(operations=pyfunc2, input_columns=["image", "target"], output_columns=["img1", "img2", "label"])
|
||||
data3 = data3.project(["_meta-filename", "img1", "img2", "label"])
|
||||
data3 = data3.rename("_meta-filename", "filename")
|
||||
num = 0
|
||||
for item in data3.create_tuple_iterator(num_epochs=1, output_numpy=True):
|
||||
|
@ -148,8 +148,8 @@ def test_voc_meta_column():
|
|||
return img
|
||||
|
||||
data4 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True)
|
||||
data4 = data4.map(operations=pyfunc3, input_columns=["image", "target"], output_columns=["img1"],
|
||||
column_order=["_meta-filename", "img1"])
|
||||
data4 = data4.map(operations=pyfunc3, input_columns=["image", "target"], output_columns=["img1"])
|
||||
data4 = data4.project(["_meta-filename", "img1"])
|
||||
data4 = data4.rename("_meta-filename", "filename")
|
||||
num = 0
|
||||
for item in data4.create_tuple_iterator(num_epochs=1, output_numpy=True):
|
||||
|
|
|
@ -24,8 +24,7 @@ import mindspore.dataset.transforms as ops
|
|||
def compare(array):
|
||||
data = ds.NumpySlicesDataset([array], column_names="x")
|
||||
array = np.array(array)
|
||||
data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"],
|
||||
column_order=["x", "y"])
|
||||
data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"])
|
||||
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
np.testing.assert_array_equal(array, d["x"])
|
||||
np.testing.assert_array_equal(array, d["y"])
|
||||
|
|
|
@ -159,8 +159,8 @@ def test_get_column_name_map():
|
|||
data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1"])
|
||||
assert data.get_col_names() == ["col1", "label"]
|
||||
data = ds.Cifar10Dataset(CIFAR10_DIR)
|
||||
data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1", "col2"],
|
||||
column_order=["col2", "col1"])
|
||||
data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1", "col2"])
|
||||
data = data.project(["col2", "col1"])
|
||||
assert data.get_col_names() == ["col2", "col1"]
|
||||
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ def test_magphase_pipeline():
|
|||
dataset = ds.NumpySlicesDataset(data1, column_names=["col1"], shuffle=False)
|
||||
magphase_window = audio.Magphase(power=1.0)
|
||||
dataset = dataset.map(operations=magphase_window, input_columns=["col1"],
|
||||
output_columns=["mag", "phase"], column_order=["mag", "phase"])
|
||||
output_columns=["mag", "phase"])
|
||||
for data1, data2 in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True):
|
||||
assert abs(data1[0] - expected[0]) < 0.00001
|
||||
assert abs(data1[1] - expected[1]) < 0.00001
|
||||
|
|
|
@ -342,6 +342,28 @@ def test_python_map_mp_seed_repeatability(set_seed_to=1337, set_num_parallel_wor
|
|||
ds.config.set_enable_shared_mem(original_enable_shared_mem)
|
||||
|
||||
|
||||
def test_map_with_deprecated_parameter():
|
||||
"""
|
||||
Feature: Map op
|
||||
Description: map with deprecated parameter
|
||||
Expectation: ValueError
|
||||
"""
|
||||
data1 = np.array(np.random.sample(size=(300, 300, 3)) * 255, dtype=np.uint8)
|
||||
data2 = np.array(np.random.sample(size=(300, 300, 3)) * 255, dtype=np.uint8)
|
||||
data3 = np.array(np.random.sample(size=(300, 300, 3)) * 255, dtype=np.uint8)
|
||||
data4 = np.array(np.random.sample(size=(300, 300, 3)) * 255, dtype=np.uint8)
|
||||
|
||||
label = [1, 2, 3, 4]
|
||||
|
||||
dataset = ds.NumpySlicesDataset(([data1, data2, data3, data4], label), ["data", "label"])
|
||||
with pytest.raises(ValueError) as info:
|
||||
dataset = dataset.map(operations=[(lambda x: (x + 1, x / 255))],
|
||||
input_columns=["data"],
|
||||
output_columns=["data2", "data3"],
|
||||
column_order=["data2", "data3"])
|
||||
assert "The parameter 'column_order' had been deleted in map operation." in str(info.value)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_map_c_transform_exception()
|
||||
test_map_py_transform_exception()
|
||||
|
@ -351,3 +373,4 @@ if __name__ == '__main__':
|
|||
test_c_map_randomness_repeatability_with_shards()
|
||||
test_python_map_mp_repeatability(num_parallel_workers=4, num_samples=4)
|
||||
test_python_map_mp_seed_repeatability()
|
||||
test_map_with_deprecated_parameter()
|
||||
|
|
|
@ -133,8 +133,7 @@ def test_offload_multi_column():
|
|||
|
||||
dataset = ds.ImageFolderDataset(DATA_DIR)
|
||||
dataset = dataset.map(operations=copy_column, input_columns=["image", "label"],
|
||||
output_columns=["image1", "image2", "label"],
|
||||
column_order=["image1", "image2", "label"])
|
||||
output_columns=["image1", "image2", "label"])
|
||||
dataset = dataset.map(operations=[C.Decode()], input_columns="image1")
|
||||
dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image1")
|
||||
dataset = dataset.map(operations=[C.Decode()], input_columns="image2")
|
||||
|
@ -143,8 +142,7 @@ def test_offload_multi_column():
|
|||
|
||||
dataset_offload = ds.ImageFolderDataset(DATA_DIR)
|
||||
dataset_offload = dataset_offload.map(operations=copy_column, input_columns=["image", "label"],
|
||||
output_columns=["image1", "image2", "label"],
|
||||
column_order=["image1", "image2", "label"])
|
||||
output_columns=["image1", "image2", "label"])
|
||||
dataset_offload = dataset_offload.map(operations=[C.Decode()], input_columns="image1")
|
||||
dataset_offload = dataset_offload.map(operations=[C.HWC2CHW()], input_columns="image1", offload=True)
|
||||
dataset_offload = dataset_offload.map(operations=[C.Decode()], input_columns="image2")
|
||||
|
@ -171,7 +169,7 @@ def test_offload_column_mapping():
|
|||
|
||||
dataset = ds.ImageFolderDataset(DATA_DIR)
|
||||
dataset = dataset.map(operations=copy_column, input_columns=["image", "label"],
|
||||
output_columns=["image1", "image2", "label"], column_order=["image1", "image2", "label"])
|
||||
output_columns=["image1", "image2", "label"])
|
||||
dataset = dataset.map(operations=[C.Decode()], input_columns="image2")
|
||||
dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image2", offload=True)
|
||||
|
||||
|
|
|
@ -51,7 +51,8 @@ def test_one_hot():
|
|||
# First dataset
|
||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||
one_hot_op = data_trans.OneHot(num_classes=depth)
|
||||
data1 = data1.map(operations=one_hot_op, input_columns=["label"], column_order=["label"])
|
||||
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
|
||||
data1 = data1.project(["label"])
|
||||
|
||||
# Second dataset
|
||||
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["label"], shuffle=False)
|
||||
|
|
|
@ -33,8 +33,8 @@ def test_map_reorder0():
|
|||
# Generator -> Map
|
||||
data0 = ds.GeneratorDataset(generator_mc, ["col0", "col1"])
|
||||
|
||||
data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out",
|
||||
column_order=["col1", "out"])
|
||||
data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out")
|
||||
data0 = data0.project(["col1", "out"])
|
||||
|
||||
for item in data0.create_tuple_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
|
||||
assert item == [np.array(1), np.array(0)]
|
||||
|
@ -55,11 +55,14 @@ def test_map_reorder1():
|
|||
|
||||
# Three map and zip
|
||||
data0 = ds.GeneratorDataset(generator_mc, ["a0", "a1", "a2"])
|
||||
data0 = data0.map(operations=(lambda x: x), input_columns="a0", column_order=["a2", "a1", "a0"])
|
||||
data0 = data0.map(operations=(lambda x: x), input_columns="a0")
|
||||
data0 = data0.project(["a2", "a1", "a0"])
|
||||
data1 = ds.GeneratorDataset(generator_mc, ["b0", "b1", "b2"])
|
||||
data1 = data1.map(operations=(lambda x: x), input_columns="b0", column_order=["b1", "b2", "b0"])
|
||||
data1 = data1.map(operations=(lambda x: x), input_columns="b0")
|
||||
data1 = data1.project(["b1", "b2", "b0"])
|
||||
data2 = ds.zip((data0, data1))
|
||||
data2 = data2.map(operations=(lambda x: x), input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"])
|
||||
data2 = data2.map(operations=(lambda x: x), input_columns="a0")
|
||||
data2 = data2.project(["b2", "a2", "b1", "a1", "b0", "a0"])
|
||||
|
||||
for item in data2.create_tuple_iterator(num_epochs=1, output_numpy=True):
|
||||
assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)]
|
||||
|
|
|
@ -204,7 +204,7 @@ def test_case_map_project_map_project():
|
|||
save_and_check_tuple(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
|
||||
|
||||
|
||||
def test_column_order():
|
||||
def test_project_operation():
|
||||
"""
|
||||
Feature: Project op
|
||||
Description: Test Project op where the output dict should maintain the insertion order
|
||||
|
@ -228,4 +228,4 @@ def test_column_order():
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_column_order()
|
||||
test_project_opreation()
|
||||
|
|
|
@ -58,8 +58,7 @@ def test_case_1():
|
|||
|
||||
# apply dataset operations
|
||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||
data1 = data1.map(operations=(lambda x: (x, x + x)), input_columns=col, output_columns=["out0", "out1"],
|
||||
column_order=["out0", "out1"])
|
||||
data1 = data1.map(operations=(lambda x: (x, x + x)), input_columns=col, output_columns=["out0", "out1"])
|
||||
|
||||
i = 0
|
||||
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
|
||||
|
@ -84,8 +83,7 @@ def test_case_2():
|
|||
# apply dataset operations
|
||||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||
|
||||
data1 = data1.map(operations=(lambda x, y: x + y), input_columns=col, output_columns="out",
|
||||
column_order=["out"])
|
||||
data1 = data1.map(operations=(lambda x, y: x + y), input_columns=col, output_columns="out")
|
||||
|
||||
i = 0
|
||||
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
|
||||
|
@ -109,7 +107,7 @@ def test_case_3():
|
|||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||
|
||||
data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col,
|
||||
output_columns=["out0", "out1", "out2"], column_order=["out0", "out1", "out2"])
|
||||
output_columns=["out0", "out1", "out2"])
|
||||
|
||||
i = 0
|
||||
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
|
||||
|
@ -137,8 +135,7 @@ def test_case_4():
|
|||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||
|
||||
data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col,
|
||||
output_columns=["out0", "out1", "out2"], num_parallel_workers=4,
|
||||
column_order=["out0", "out1", "out2"])
|
||||
output_columns=["out0", "out1", "out2"], num_parallel_workers=4)
|
||||
|
||||
i = 0
|
||||
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
|
||||
|
@ -244,7 +241,6 @@ def test_case_8():
|
|||
|
||||
data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col,
|
||||
output_columns=["out0", "out1", "out2"], num_parallel_workers=4,
|
||||
column_order=["out0", "out1", "out2"],
|
||||
python_multiprocessing=True)
|
||||
|
||||
i = 0
|
||||
|
@ -333,7 +329,7 @@ def test_pyfunc_implicit_compose():
|
|||
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||
|
||||
data1 = data1.map(operations=[(lambda x, y: (x, x + y, x + y + 1)), (lambda x, y, z: (x, y, z))], input_columns=col,
|
||||
output_columns=["out0", "out1", "out2"], column_order=["out0", "out1", "out2"])
|
||||
output_columns=["out0", "out1", "out2"])
|
||||
|
||||
i = 0
|
||||
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
|
||||
|
|
|
@ -598,7 +598,7 @@ def test_random_crop_09_c():
|
|||
|
||||
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||
data = data.map(operations=ops.Duplicate(), input_columns=["image"],
|
||||
output_columns=["image", "image_copy"], column_order=["image", "image_copy"])
|
||||
output_columns=["image", "image_copy"])
|
||||
random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200])
|
||||
decode_op = vision.Decode()
|
||||
|
||||
|
|
|
@ -466,7 +466,7 @@ def test_random_crop_and_resize_07():
|
|||
|
||||
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||
data = data.map(operations=ops.Duplicate(), input_columns=["image"],
|
||||
output_columns=["image", "image_copy"], column_order=["image", "image_copy"])
|
||||
output_columns=["image", "image_copy"])
|
||||
random_crop_and_resize_op = vision.RandomResizedCrop((256, 512), (2, 2), (1, 3))
|
||||
decode_op = vision.Decode()
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@ def test_random_resized_crop_with_bbox_op_c(plot_vis=False):
|
|||
|
||||
# map to apply ops
|
||||
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
|
||||
data_voc2 = data_voc2.project(["image", "bbox"])
|
||||
|
||||
filename = "random_resized_crop_with_bbox_01_c_result.npz"
|
||||
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||
|
|
|
@ -98,6 +98,7 @@ def test_random_crop_with_bbox_op2_c(plot_vis=False):
|
|||
|
||||
# map to apply ops
|
||||
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
|
||||
data_voc2 = data_voc2.project(["image", "bbox"])
|
||||
|
||||
filename = "random_crop_with_bbox_01_c_result.npz"
|
||||
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||
|
|
|
@ -252,7 +252,7 @@ def test_random_horizontal_op_1():
|
|||
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=[
|
||||
"image"], shuffle=False)
|
||||
data = data.map(operations=ops.Duplicate(), input_columns=["image"],
|
||||
output_columns=["image", "image_copy"], column_order=["image", "image_copy"])
|
||||
output_columns=["image", "image_copy"])
|
||||
random_horizontal_op = vision.RandomHorizontalFlip(1.0)
|
||||
decode_op = vision.Decode()
|
||||
|
||||
|
|
|
@ -95,6 +95,7 @@ def test_random_horizontal_flip_with_bbox_valid_rand_c(plot_vis=False):
|
|||
|
||||
# map to apply ops
|
||||
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
|
||||
data_voc2 = data_voc2.project(["image", "bbox"])
|
||||
|
||||
filename = "random_horizontal_flip_with_bbox_01_c_result.npz"
|
||||
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||
|
|
|
@ -92,7 +92,7 @@ def test_random_resize_op_1():
|
|||
|
||||
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||
data = data.map(operations=ops.Duplicate(), input_columns=["image"],
|
||||
output_columns=["image", "image_copy"], column_order=["image", "image_copy"])
|
||||
output_columns=["image", "image_copy"])
|
||||
resize_op = vision.RandomResize(10)
|
||||
decode_op = vision.Decode()
|
||||
|
||||
|
|
|
@ -49,6 +49,7 @@ def test_random_resize_with_bbox_op_voc_c(plot_vis=False):
|
|||
|
||||
# map to apply ops
|
||||
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
|
||||
data_voc2 = data_voc2.project(["image", "bbox"])
|
||||
|
||||
filename = "random_resize_with_bbox_op_01_c_voc_result.npz"
|
||||
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||
|
@ -82,6 +83,7 @@ def test_random_resize_with_bbox_op_rand_coco_c(plot_vis=False):
|
|||
# map to apply ops
|
||||
|
||||
data_coco2 = helper_perform_ops_bbox(data_coco2, test_op)
|
||||
data_coco2 = data_coco2.project(["image", "bbox"])
|
||||
|
||||
filename = "random_resize_with_bbox_op_01_c_coco_result.npz"
|
||||
save_and_check_md5(data_coco2, filename, generate_golden=GENERATE_GOLDEN)
|
||||
|
|
|
@ -232,7 +232,7 @@ def test_random_vertical_op_1():
|
|||
|
||||
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
|
||||
data = data.map(operations=ops.Duplicate(), input_columns=["image"],
|
||||
output_columns=["image", "image_copy"], column_order=["image", "image_copy"])
|
||||
output_columns=["image", "image_copy"])
|
||||
random_vertical_op = vision.RandomVerticalFlip(1.0)
|
||||
decode_op = vision.Decode()
|
||||
|
||||
|
|
|
@ -95,6 +95,7 @@ def test_random_vertical_flip_with_bbox_op_rand_c(plot_vis=False):
|
|||
|
||||
# map to apply ops
|
||||
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
|
||||
data_voc2 = data_voc2.project(["image", "bbox"])
|
||||
|
||||
filename = "random_vertical_flip_with_bbox_01_c_result.npz"
|
||||
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||
|
|
|
@ -48,6 +48,7 @@ def test_resize_with_bbox_op_voc_c(plot_vis=False):
|
|||
|
||||
# map to apply ops
|
||||
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
|
||||
data_voc2 = data_voc2.project(["image", "bbox"])
|
||||
|
||||
filename = "resize_with_bbox_op_01_c_voc_result.npz"
|
||||
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
|
||||
|
@ -75,6 +76,7 @@ def test_resize_with_bbox_op_coco_c(plot_vis=False):
|
|||
# map to apply ops
|
||||
|
||||
data_coco2 = helper_perform_ops_bbox(data_coco2, test_op)
|
||||
data_coco2 = data_coco2.project(["image", "bbox"])
|
||||
|
||||
filename = "resize_with_bbox_op_01_c_coco_result.npz"
|
||||
save_and_check_md5(data_coco2, filename, generate_golden=GENERATE_GOLDEN)
|
||||
|
|
|
@ -93,7 +93,7 @@ def slice_to_patches(ori_size, num_h, num_w, pad_or_drop, fill_value=0, plot=Fal
|
|||
dataset1 = dataset1.map(operations=decode_op, input_columns=["image"])
|
||||
dataset1 = dataset1.map(operations=resize_op, input_columns=["image"])
|
||||
dataset1 = dataset1.map(operations=slice_patches_op,
|
||||
input_columns=["image"], output_columns=cols, column_order=cols)
|
||||
input_columns=["image"], output_columns=cols)
|
||||
# Second dataset
|
||||
dataset2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
||||
dataset2 = dataset2.map(operations=decode_op, input_columns=["image"])
|
||||
|
@ -101,7 +101,7 @@ def slice_to_patches(ori_size, num_h, num_w, pad_or_drop, fill_value=0, plot=Fal
|
|||
func_slice_patches = functools.partial(
|
||||
slice_patches, num_h=num_h, num_w=num_w, pad_or_drop=pad_or_drop, fill_value=fill_value)
|
||||
dataset2 = dataset2.map(operations=func_slice_patches,
|
||||
input_columns=["image"], output_columns=cols, column_order=cols)
|
||||
input_columns=["image"], output_columns=cols)
|
||||
|
||||
num_iter = 0
|
||||
patches_c = []
|
||||
|
@ -186,7 +186,6 @@ def test_slice_patches_08():
|
|||
dataset = ds.NumpySlicesDataset(np_data, column_names=["image"])
|
||||
slice_patches_op = vision.SlicePatches(2, 2)
|
||||
dataset = dataset.map(input_columns=["image"], output_columns=["img0", "img1", "img2", "img3"],
|
||||
column_order=["img0", "img1", "img2", "img3"],
|
||||
operations=slice_patches_op)
|
||||
for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
patch_shape = item['img0'].shape
|
||||
|
@ -226,7 +225,7 @@ def skip_test_slice_patches_11():
|
|||
slice_patches_op = vision.SlicePatches(10, 13, mode.SliceMode.DROP)
|
||||
cols = ['img' + str(x) for x in range(10*13)]
|
||||
dataset = dataset.map(input_columns=["image"], output_columns=cols,
|
||||
column_order=cols, operations=slice_patches_op)
|
||||
operations=slice_patches_op)
|
||||
for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
patch_shape = item['img0'].shape
|
||||
assert patch_shape == (700, 538, 256)
|
||||
|
|
|
@ -44,8 +44,7 @@ def test_spectral_centroid_pipeline():
|
|||
wav = [[[1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5]]]
|
||||
dataset = ds.NumpySlicesDataset(wav, column_names=["audio"], shuffle=False)
|
||||
out = audio.SpectralCentroid(sample_rate=44100, n_fft=8)
|
||||
dataset = dataset.map(operations=out, input_columns=["audio"], output_columns=["SpectralCentroid"],
|
||||
column_order=['SpectralCentroid'])
|
||||
dataset = dataset.map(operations=out, input_columns=["audio"], output_columns=["SpectralCentroid"])
|
||||
result = np.array([[[4436.1182, 3580.0718, 2902.4917, 3334.8962, 5199.8350, 6284.4814,
|
||||
3580.0718, 2895.5659]]])
|
||||
for data1 in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
|
|
|
@ -45,8 +45,7 @@ def test_spectrogram_pipeline():
|
|||
wav = [[[1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5]]]
|
||||
dataset = ds.NumpySlicesDataset(wav, column_names=["audio"], shuffle=False)
|
||||
out = audio.Spectrogram(n_fft=8)
|
||||
dataset = dataset.map(operations=out, input_columns=["audio"], output_columns=["Spectrogram"],
|
||||
column_order=['Spectrogram'])
|
||||
dataset = dataset.map(operations=out, input_columns=["audio"], output_columns=["Spectrogram"])
|
||||
result = np.array([[[2.8015e+01, 1.2100e+02, 3.1354e+02, 1.6900e+02, 2.5000e+01,
|
||||
1.0843e+01, 1.2100e+02, 3.3150e+02],
|
||||
[3.2145e+00, 3.3914e+01, 9.4728e+01, 4.5914e+01, 9.9142e+00,
|
||||
|
|
|
@ -371,8 +371,7 @@ def test_process_string_pipeline():
|
|||
data = np.array([["apple"], ["orange"], ["banana"], ["1"], ["2"], ["3"], ["a"], ["b"], ["c"]], dtype=dtype)
|
||||
dataset = ds.NumpySlicesDataset(data, column_names=["text"])
|
||||
assert dataset.output_types()[0].type == dtype
|
||||
dataset = dataset.map(lambda e: (e, e), input_columns=["text"], output_columns=["text1", "text2"],
|
||||
column_order=["text1", "text2"])
|
||||
dataset = dataset.map(lambda e: (e, e), input_columns=["text"], output_columns=["text1", "text2"])
|
||||
for i, item in enumerate(dataset.create_dict_iterator(num_epochs=1, output_numpy=True)):
|
||||
item["text1"] = data[i]
|
||||
item["text2"] = data[i]
|
||||
|
|
|
@ -106,8 +106,7 @@ def check_basic_tokenizer_with_offsets(first, last, expected_tokens, expected_of
|
|||
with_offsets=True)
|
||||
|
||||
dataset = dataset.map(operations=basic_tokenizer, input_columns=['text'],
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'],
|
||||
column_order=['token', 'offsets_start', 'offsets_limit'])
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'])
|
||||
count = 0
|
||||
for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
token = i['token']
|
||||
|
|
|
@ -214,8 +214,7 @@ def check_bert_tokenizer_with_offsets(first, last, expect_str,
|
|||
unknown_token=unknown_token, lower_case=lower_case, keep_whitespace=keep_whitespace,
|
||||
normalization_form=normalization_form, preserve_unused_token=preserve_unused_token, with_offsets=True)
|
||||
dataset = dataset.map(operations=tokenizer_op, input_columns=['text'],
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'],
|
||||
column_order=['token', 'offsets_start', 'offsets_limit'])
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'])
|
||||
count = 0
|
||||
for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
token = i['token']
|
||||
|
|
|
@ -282,7 +282,6 @@ def test_jieba_with_offsets_1():
|
|||
jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
|
||||
data = data.map(operations=jieba_op, input_columns=["text"],
|
||||
output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
column_order=["token", "offsets_start", "offsets_limit"],
|
||||
num_parallel_workers=1)
|
||||
expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
|
||||
expected_offsets_start = [0, 12, 21, 27, 33, 36, 42]
|
||||
|
@ -308,7 +307,6 @@ def test_jieba_with_offsets_1_1():
|
|||
jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.HMM, with_offsets=True)
|
||||
data = data.map(operations=jieba_op, input_columns=["text"],
|
||||
output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
column_order=["token", "offsets_start", "offsets_limit"],
|
||||
num_parallel_workers=1)
|
||||
expect = ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧']
|
||||
expected_offsets_start = [0, 6, 12, 15, 18, 21, 27, 33, 36, 42, 45]
|
||||
|
@ -333,7 +331,6 @@ def test_jieba_with_offsets_1_2():
|
|||
jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MIX, with_offsets=True)
|
||||
data = data.map(operations=jieba_op, input_columns=["text"],
|
||||
output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
column_order=["token", "offsets_start", "offsets_limit"],
|
||||
num_parallel_workers=1)
|
||||
expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
|
||||
expected_offsets_start = [0, 12, 21, 27, 33, 36, 42]
|
||||
|
@ -361,7 +358,6 @@ def test_jieba_with_offsets_2():
|
|||
expect = ['男默女泪', '市', '长江大桥']
|
||||
data = data.map(operations=jieba_op, input_columns=["text"],
|
||||
output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
column_order=["token", "offsets_start", "offsets_limit"],
|
||||
num_parallel_workers=2)
|
||||
expected_offsets_start = [0, 12, 15]
|
||||
expected_offsets_limit = [12, 15, 27]
|
||||
|
@ -387,7 +383,6 @@ def test_jieba_with_offsets_2_1():
|
|||
jieba_op.add_word("男默女泪", 10)
|
||||
data = data.map(operations=jieba_op, input_columns=["text"],
|
||||
output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
column_order=["token", "offsets_start", "offsets_limit"],
|
||||
num_parallel_workers=2)
|
||||
expect = ['男默女泪', '市', '长江大桥']
|
||||
expected_offsets_start = [0, 12, 15]
|
||||
|
@ -414,7 +409,6 @@ def test_jieba_with_offsets_2_2():
|
|||
jieba_op.add_word("江大桥", 20000)
|
||||
data = data.map(operations=jieba_op, input_columns=["text"],
|
||||
output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
column_order=["token", "offsets_start", "offsets_limit"],
|
||||
num_parallel_workers=2)
|
||||
expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
|
||||
expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51]
|
||||
|
@ -444,7 +438,6 @@ def test_jieba_with_offsets_3():
|
|||
jieba_op.add_dict(user_dict)
|
||||
data = data.map(operations=jieba_op, input_columns=["text"],
|
||||
output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
column_order=["token", "offsets_start", "offsets_limit"],
|
||||
num_parallel_workers=1)
|
||||
expect = ['男默女泪', '市', '长江大桥']
|
||||
expected_offsets_start = [0, 12, 15]
|
||||
|
@ -475,7 +468,6 @@ def test_jieba_with_offsets_3_1():
|
|||
jieba_op.add_dict(user_dict)
|
||||
data = data.map(operations=jieba_op, input_columns=["text"],
|
||||
output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
column_order=["token", "offsets_start", "offsets_limit"],
|
||||
num_parallel_workers=1)
|
||||
expect = ['男默女泪', '市长', '江大桥']
|
||||
expected_offsets_start = [0, 12, 18]
|
||||
|
@ -504,7 +496,6 @@ def test_jieba_with_offsets_4():
|
|||
jieba_op.add_dict(dict_file)
|
||||
data = data.map(operations=jieba_op, input_columns=["text"],
|
||||
output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
column_order=["token", "offsets_start", "offsets_limit"],
|
||||
num_parallel_workers=1)
|
||||
expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
|
||||
expected_offsets_start = [0, 12, 21, 27, 33, 36, 42]
|
||||
|
@ -532,7 +523,6 @@ def test_jieba_with_offsets_5():
|
|||
jieba_op.add_word("江大桥", 20000)
|
||||
data = data.map(operations=jieba_op, input_columns=["text"],
|
||||
output_columns=["token", "offsets_start", "offsets_limit"],
|
||||
column_order=["token", "offsets_start", "offsets_limit"],
|
||||
num_parallel_workers=1)
|
||||
expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
|
||||
expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51]
|
||||
|
|
|
@ -64,8 +64,7 @@ def test_unicode_char_tokenizer_with_offsets():
|
|||
dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
|
||||
tokenizer = text.UnicodeCharTokenizer(with_offsets=True)
|
||||
dataset = dataset.map(operations=tokenizer, input_columns=['text'],
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'],
|
||||
column_order=['token', 'offsets_start', 'offsets_limit'])
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'])
|
||||
tokens = []
|
||||
expected_offsets_start = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
|
||||
[0, 3, 6, 9, 12, 15], [0, 3, 6, 9, 10, 11, 12, 13, 14, 15, 16], [0, 1]]
|
||||
|
@ -116,8 +115,7 @@ def test_whitespace_tokenizer_with_offsets():
|
|||
dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
|
||||
tokenizer = text.WhitespaceTokenizer(with_offsets=True)
|
||||
dataset = dataset.map(operations=tokenizer, input_columns=['text'],
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'],
|
||||
column_order=['token', 'offsets_start', 'offsets_limit'])
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'])
|
||||
tokens = []
|
||||
expected_offsets_start = [[0, 8, 11], [0], [0], [0]]
|
||||
expected_offsets_limit = [[7, 10, 19], [18], [17], [0]]
|
||||
|
@ -189,8 +187,7 @@ def test_unicode_script_tokenizer_with_offsets():
|
|||
dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
|
||||
tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=False, with_offsets=True)
|
||||
dataset = dataset.map(operations=tokenizer, input_columns=['text'],
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'],
|
||||
column_order=['token', 'offsets_start', 'offsets_limit'])
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'])
|
||||
tokens = []
|
||||
expected_offsets_start = [[0, 8, 11, 18], [0, 15], [0, 9, 16], [0]]
|
||||
expected_offsets_limit = [[7, 10, 18, 19], [15, 18], [9, 16, 17], [0]]
|
||||
|
@ -218,8 +215,7 @@ def test_unicode_script_tokenizer_with_offsets2():
|
|||
dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
|
||||
tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=True)
|
||||
dataset = dataset.map(operations=tokenizer, input_columns=['text'],
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'],
|
||||
column_order=['token', 'offsets_start', 'offsets_limit'])
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'])
|
||||
tokens = []
|
||||
expected_offsets_start = [[0, 7, 8, 10, 11, 18], [0, 15], [0, 9, 16], [0]]
|
||||
expected_offsets_limit = [[7, 8, 10, 11, 18, 19], [15, 18], [9, 16, 17], [2]]
|
||||
|
@ -370,8 +366,7 @@ def test_regex_tokenizer_with_offsets():
|
|||
dataset = dataset.take(last - first + 1)
|
||||
tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True)
|
||||
dataset = dataset.map(operations=tokenizer_op, input_columns=['text'],
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'],
|
||||
column_order=['token', 'offsets_start', 'offsets_limit'])
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'])
|
||||
out_text = []
|
||||
count = 0
|
||||
for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
|
|
|
@ -127,8 +127,7 @@ def check_wordpiece_tokenizer_with_offsets(first, last, expect_str, expected_off
|
|||
tokenizer_op = text.WordpieceTokenizer(vocab=vocab, with_offsets=True, unknown_token=unknown_token,
|
||||
max_bytes_per_token=max_bytes_per_token)
|
||||
dataset = dataset.map(operations=tokenizer_op, input_columns=['text'],
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'],
|
||||
column_order=['token', 'offsets_start', 'offsets_limit'])
|
||||
output_columns=['token', 'offsets_start', 'offsets_limit'])
|
||||
count = 0
|
||||
for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
token = i['token']
|
||||
|
|
|
@ -24,8 +24,7 @@ import mindspore.dataset.transforms as ops
|
|||
def compare(array, res, idx, cnt):
|
||||
data = ds.NumpySlicesDataset([array], column_names="x")
|
||||
data = data.batch(2)
|
||||
data = data.map(operations=ops.Unique(), input_columns=["x"], output_columns=["x", "y", "z"],
|
||||
column_order=["x", "y", "z"])
|
||||
data = data.map(operations=ops.Unique(), input_columns=["x"], output_columns=["x", "y", "z"])
|
||||
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
np.testing.assert_array_equal(res, d["x"])
|
||||
np.testing.assert_array_equal(idx, d["y"])
|
||||
|
|
|
@ -429,20 +429,16 @@ def helper_perform_ops_bbox(data, test_op=None, edge_case=False):
|
|||
operations=[lambda img, bboxes: (
|
||||
img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype)), test_op],
|
||||
input_columns=["image", "bbox"],
|
||||
output_columns=["image", "bbox"],
|
||||
column_order=["image", "bbox"])
|
||||
output_columns=["image", "bbox"])
|
||||
return data.map(
|
||||
operations=[lambda img, bboxes: (
|
||||
img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))],
|
||||
input_columns=["image", "bbox"],
|
||||
output_columns=["image", "bbox"],
|
||||
column_order=["image", "bbox"])
|
||||
output_columns=["image", "bbox"])
|
||||
|
||||
if test_op:
|
||||
return data.map(operations=[test_op], input_columns=["image", "bbox"],
|
||||
output_columns=[
|
||||
"image", "bbox"],
|
||||
column_order=["image", "bbox"])
|
||||
output_columns=["image", "bbox"])
|
||||
|
||||
return data
|
||||
|
||||
|
@ -456,8 +452,7 @@ def helper_perform_ops_bbox_edgecase_float(data):
|
|||
return data.map(operations=lambda img, bbox: (img, np.array(
|
||||
[[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)),
|
||||
input_columns=["image", "bbox"],
|
||||
output_columns=["image", "bbox"],
|
||||
column_order=["image", "bbox"])
|
||||
output_columns=["image", "bbox"])
|
||||
|
||||
|
||||
def helper_test_visual_bbox(plot_vis, data1, data2):
|
||||
|
@ -579,12 +574,10 @@ def check_bad_bbox(data, test_op, invalid_bbox_type, expected_error):
|
|||
# map to use selected invalid bounding box type
|
||||
data = data.map(operations=lambda img, bboxes: add_bad_bbox(img, bboxes, invalid_bbox_type),
|
||||
input_columns=["image", "bbox"],
|
||||
output_columns=["image", "bbox"],
|
||||
column_order=["image", "bbox"])
|
||||
output_columns=["image", "bbox"])
|
||||
# map to apply ops
|
||||
data = data.map(operations=[test_op], input_columns=["image", "bbox"],
|
||||
output_columns=["image", "bbox"],
|
||||
column_order=["image", "bbox"]) # Add column for "bbox"
|
||||
output_columns=["image", "bbox"])
|
||||
for _, _ in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)):
|
||||
break
|
||||
except RuntimeError as error:
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue