del: column_order in map

This commit is contained in:
jonyguo 2022-09-26 14:44:58 +08:00
parent a60615e409
commit 911cbd7f6a
102 changed files with 380 additions and 509 deletions

View File

@ -26,7 +26,6 @@ mindspore.dataset.Dataset.map
- **operations** (Union[list[TensorOperation], list[functions]]) - 一组数据增强操作支持数据集增强算子或者用户自定义的Python Callable对象。map操作将按顺序将一组数据增强作用在数据集对象上。
- **input_columns** (Union[str, list[str]], 可选) - 第一个数据增强操作的输入数据列。此列表的长度必须与 `operations` 列表中第一个数据增强的预期输入列数相匹配。默认值None。表示所有数据列都将传递给第一个数据增强操作。
- **output_columns** (Union[str, list[str]], 可选) - 最后一个数据增强操作的输出数据列。如果 `input_columns` 长度不等于 `output_columns` 长度则必须指定此参数。列表的长度必须必须与最后一个数据增强的输出列数相匹配。默认值None输出列将与输入列具有相同的名称。
- **column_order** (Union[str, list[str]], 可选) - 指定传递到下一个数据集操作的数据列的顺序。如果 `input_columns` 长度不等于 `output_columns` 长度,则必须指定此参数。注意:参数的列名不限定在 `input_columns``output_columns` 中指定的列也可以是上一个操作输出的未被处理的数据列。默认值None按照原输入顺序排列。
- **num_parallel_workers** (int, 可选) - 指定map操作的多进程/多线程并发数加快处理速度。默认值None将使用 `set_num_parallel_workers` 设置的并发数。
- **\*\*kwargs** - 其他参数。

View File

@ -168,18 +168,11 @@ TensorRow FuncPtrConverter(const std::function<MSTensorVec(MSTensorVec)> &func,
}
// Function to create the iterator, which will build and launch the execution tree.
std::shared_ptr<Iterator> Dataset::CreateIteratorCharIF(const std::vector<std::vector<char>> &columns,
int32_t num_epochs) {
std::shared_ptr<Iterator> Dataset::CreateIteratorCharIF(int32_t num_epochs) {
std::shared_ptr<Iterator> iter;
try {
auto ds = shared_from_this();
// The specified columns will be selected from the dataset and passed down the pipeline
// in the order specified, other columns will be discarded.
if (!VectorCharToString(columns).empty()) {
ds = ds->Project(VectorCharToString(columns));
}
iter = std::make_shared<Iterator>();
Status rc = iter->BuildAndLaunchTree(ds, num_epochs);
if (rc.IsError()) {
@ -195,15 +188,8 @@ std::shared_ptr<Iterator> Dataset::CreateIteratorCharIF(const std::vector<std::v
}
// Function to create the iterator, which will build and launch the execution tree.
std::shared_ptr<PullIterator> Dataset::CreatePullBasedIterator(const std::vector<std::vector<char>> &columns) {
// The specified columns will be selected from the dataset and passed down the pipeline
// in the order specified, other columns will be discarded.
// This code is not in a try/catch block because there is no execution tree class that will be created.
std::shared_ptr<PullIterator> Dataset::CreatePullBasedIterator() {
auto ds = shared_from_this();
if (!VectorCharToString(columns).empty()) {
ds = ds->Project(VectorCharToString(columns));
}
std::shared_ptr<PullIterator> iter = std::make_shared<PullIterator>();
Status rc = iter->BuildAndLaunchTree(ds, 1);
if (rc.IsError()) {
@ -498,16 +484,13 @@ FilterDataset::FilterDataset(const std::shared_ptr<Dataset> &input,
MapDataset::MapDataset(const std::shared_ptr<Dataset> &input,
const std::vector<std::shared_ptr<TensorOperation>> &operations,
const std::vector<std::vector<char>> &input_columns,
const std::vector<std::vector<char>> &output_columns,
const std::vector<std::vector<char>> &project_columns,
const std::shared_ptr<DatasetCache> &cache,
const std::vector<std::vector<char>> &output_columns, const std::shared_ptr<DatasetCache> &cache,
const std::vector<std::shared_ptr<DSCallback>> &callbacks) {
if (input == nullptr) {
ir_node_ = nullptr;
} else {
auto ds = std::make_shared<MapNode>(input->IRNode(), operations, VectorCharToString(input_columns),
VectorCharToString(output_columns), VectorCharToString(project_columns), cache,
callbacks);
VectorCharToString(output_columns), cache, callbacks);
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
}

View File

@ -191,19 +191,19 @@ PYBIND_REGISTER(FilterNode, 2, ([](const py::module *m) {
PYBIND_REGISTER(MapNode, 2, ([](const py::module *m) {
(void)py::class_<MapNode, DatasetNode, std::shared_ptr<MapNode>>(*m, "MapNode", "to create a MapNode")
.def(py::init(
[](const std::shared_ptr<DatasetNode> &self, const py::list &operations,
const py::list &input_columns, const py::list &output_columns, const py::list &project_columns,
std::vector<std::shared_ptr<PyDSCallback>> py_callbacks, int64_t max_rowsize,
const ManualOffloadMode offload, std::shared_ptr<PythonMultiprocessingRuntime> python_mp) {
auto map = std::make_shared<MapNode>(
self, std::move(toTensorOperations(operations)), toStringVector(input_columns),
toStringVector(output_columns), toStringVector(project_columns), nullptr,
std::vector<std::shared_ptr<DSCallback>>(py_callbacks.begin(), py_callbacks.end()), offload,
python_mp);
THROW_IF_ERROR(map->ValidateParams());
return map;
}));
.def(py::init([](const std::shared_ptr<DatasetNode> &self, const py::list &operations,
const py::list &input_columns, const py::list &output_columns,
std::vector<std::shared_ptr<PyDSCallback>> &py_callbacks, int64_t max_rowsize,
const ManualOffloadMode &offload,
std::shared_ptr<PythonMultiprocessingRuntime> &python_mp) {
auto map = std::make_shared<MapNode>(
self, std::move(toTensorOperations(operations)), toStringVector(input_columns),
toStringVector(output_columns), nullptr,
std::vector<std::shared_ptr<DSCallback>>(py_callbacks.begin(), py_callbacks.end()), offload,
python_mp);
THROW_IF_ERROR(map->ValidateParams());
return map;
}));
}));
PYBIND_REGISTER(PythonMultiprocessingRuntime, 1, ([](const py::module *m) {

View File

@ -34,13 +34,11 @@ namespace dataset {
MapNode::MapNode(std::shared_ptr<DatasetNode> child, std::vector<std::shared_ptr<TensorOperation>> operations,
std::vector<std::string> input_columns, std::vector<std::string> output_columns,
const std::vector<std::string> &project_columns, std::shared_ptr<DatasetCache> cache,
std::vector<std::shared_ptr<DSCallback>> callbacks, ManualOffloadMode offload,
std::shared_ptr<PythonMultiprocessingRuntime> python_mp)
std::shared_ptr<DatasetCache> cache, std::vector<std::shared_ptr<DSCallback>> callbacks,
ManualOffloadMode offload, std::shared_ptr<PythonMultiprocessingRuntime> python_mp)
: operations_(operations),
input_columns_(input_columns),
output_columns_(output_columns),
project_columns_(project_columns),
DatasetNode(std::move(cache)),
callbacks_(callbacks),
offload_(offload),
@ -50,8 +48,8 @@ MapNode::MapNode(std::shared_ptr<DatasetNode> child, std::vector<std::shared_ptr
std::shared_ptr<DatasetNode> MapNode::Copy() {
std::vector<std::shared_ptr<TensorOperation>> operations = operations_;
auto node = std::make_shared<MapNode>(nullptr, operations, input_columns_, output_columns_, project_columns_, cache_,
callbacks_, offload_, python_mp_);
auto node = std::make_shared<MapNode>(nullptr, operations, input_columns_, output_columns_, cache_, callbacks_,
offload_, python_mp_);
(void)node->SetNumWorkers(num_workers_);
(void)node->SetConnectorQueueSize(connector_que_size_);
return node;
@ -59,7 +57,7 @@ std::shared_ptr<DatasetNode> MapNode::Copy() {
void MapNode::Print(std::ostream &out) const {
out << (Name() + "(<ops>" + ",input:" + PrintColumns(input_columns_) + ",output:" + PrintColumns(output_columns_) +
",<project_cols>" + ",num_tensor_ops:")
",num_tensor_ops:")
<< operations_.size() << ",...)";
}
@ -91,12 +89,6 @@ Status MapNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
map_op->AddCallbacks(callbacks_);
}
if (!project_columns_.empty()) {
auto project_op = std::make_shared<ProjectOp>(project_columns_);
project_op->SetTotalRepeats(GetTotalRepeats());
project_op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
node_ops->push_back(project_op);
}
map_op->SetTotalRepeats(GetTotalRepeats());
map_op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
if (python_mp_ != nullptr) {
@ -128,10 +120,6 @@ Status MapNode::ValidateParams() {
RETURN_IF_NOT_OK(ValidateDatasetColumnParam("Map", "output_columns", output_columns_));
}
if (!project_columns_.empty()) {
RETURN_IF_NOT_OK(ValidateDatasetColumnParam("Map", "project_columns", project_columns_));
}
return Status::OK();
}
@ -165,7 +153,6 @@ Status MapNode::to_json(nlohmann::json *out_json) {
args["connector_queue_size"] = connector_que_size_;
args["input_columns"] = input_columns_;
args["output_columns"] = output_columns_;
args["project_columns"] = project_columns_;
if (cache_ != nullptr) {
nlohmann::json cache_args;
RETURN_IF_NOT_OK(cache_->to_json(&cache_args));
@ -202,14 +189,12 @@ Status MapNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode>
RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "connector_queue_size", kMapNode));
RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "input_columns", kMapNode));
RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "output_columns", kMapNode));
RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "project_columns", kMapNode));
RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "operations", kMapNode));
std::vector<std::string> input_columns = json_obj["input_columns"];
std::vector<std::string> output_columns = json_obj["output_columns"];
std::vector<std::string> project_columns = json_obj["project_columns"];
std::vector<std::shared_ptr<TensorOperation>> operations;
RETURN_IF_NOT_OK(Serdes::ConstructTensorOps(json_obj["operations"], &operations));
*result = std::make_shared<MapNode>(ds, operations, input_columns, output_columns, project_columns);
*result = std::make_shared<MapNode>(ds, operations, input_columns, output_columns);
(void)(*result)->SetNumWorkers(json_obj["num_parallel_workers"]);
(void)(*result)->SetConnectorQueueSize(json_obj["connector_queue_size"]);
return Status::OK();

View File

@ -33,8 +33,7 @@ class MapNode : public DatasetNode {
/// \brief Constructor
MapNode(std::shared_ptr<DatasetNode> child, std::vector<std::shared_ptr<TensorOperation>> operations,
std::vector<std::string> input_columns = {}, std::vector<std::string> output_columns = {},
const std::vector<std::string> &columns = {}, std::shared_ptr<DatasetCache> cache = nullptr,
std::vector<std::shared_ptr<DSCallback>> callbacks = {},
std::shared_ptr<DatasetCache> cache = nullptr, std::vector<std::shared_ptr<DSCallback>> callbacks = {},
ManualOffloadMode offload = ManualOffloadMode::kUnspecified,
std::shared_ptr<PythonMultiprocessingRuntime> python_mp = nullptr);
@ -89,7 +88,6 @@ class MapNode : public DatasetNode {
const auto &TensorOperations() const { return operations_; }
const std::vector<std::string> &InputColumns() const { return input_columns_; }
const std::vector<std::string> &OutputColumns() const { return output_columns_; }
const std::vector<std::string> &ProjectColumns() const { return project_columns_; }
const std::vector<std::shared_ptr<DSCallback>> &Callbacks() const { return callbacks_; }
ManualOffloadMode GetOffload() const { return offload_; }
@ -124,7 +122,6 @@ class MapNode : public DatasetNode {
std::vector<std::shared_ptr<TensorOperation>> operations_;
std::vector<std::string> input_columns_;
std::vector<std::string> output_columns_;
std::vector<std::string> project_columns_;
std::vector<std::shared_ptr<DSCallback>> callbacks_;
/// \brief ManualOffloadMode to indicate manual_offload status

View File

@ -140,7 +140,6 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers);
/// \brief A Function to create an PullBasedIterator over the Dataset.
/// \param[in] columns List of columns to be used to specify the order of columns.
/// \return Shared pointer to the Iterator.
/// \par Example
/// \code
@ -149,10 +148,9 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
/// \endcode
std::shared_ptr<PullIterator> CreatePullBasedIterator(const std::vector<std::vector<char>> &columns = {});
std::shared_ptr<PullIterator> CreatePullBasedIterator();
/// \brief Function to create an Iterator over the Dataset pipeline.
/// \param[in] columns List of columns to be used to specify the order of columns.
/// \param[in] num_epochs Number of epochs to run through the pipeline (default=-1, which means infinite epochs).
/// An empty row is returned at the end of each epoch.
/// \return Shared pointer to the Iterator.
@ -163,9 +161,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
/// \endcode
std::shared_ptr<Iterator> CreateIterator(const std::vector<std::string> &columns = {}, int32_t num_epochs = -1) {
return CreateIteratorCharIF(VectorStringToChar(columns), num_epochs);
}
std::shared_ptr<Iterator> CreateIterator(int32_t num_epochs = -1) { return CreateIteratorCharIF(num_epochs); }
/// \brief Function to transfer data through a device.
/// \note If device is Ascend, features of data will be transferred one by one. The limitation
@ -385,7 +381,6 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
/// The size of this list must match the number of output columns of the
/// last operation. The default output_columns will have the same
/// name as the input columns, i.e., the columns will be replaced.
/// \param[in] project_columns A list of column names to project.
/// \param[in] cache Tensor cache to use (default=nullptr, which means no cache is used).
/// \param[in] callbacks List of Dataset callbacks to be called.
/// \return Shared pointer to the current Dataset.
@ -397,36 +392,24 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
///
/// /* 1) Simple map example */
/// // Apply decode_op on column "image". This column will be replaced by the outputted
/// // column of decode_op. Since column_order is not provided, both columns "image"
/// // and "label" will be propagated to the child node in their original order.
/// // column of decode_op.
/// dataset = dataset->Map({decode_op}, {"image"});
///
/// // Decode and rename column "image" to "decoded_image".
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"});
///
/// // Specify the order of the output columns.
/// dataset = dataset->Map({decode_op}, {"image"}, {}, {"label", "image"});
///
/// // Rename column "image" to "decoded_image" and also specify the order of the output columns.
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"}, {"label", "decoded_image"});
///
/// // Rename column "image" to "decoded_image" and keep only this column.
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"}, {"decoded_image"});
///
/// /* 2) Map example with more than one operation */
/// // Create a dataset where the images are decoded, then randomly color jittered.
/// // decode_op takes column "image" as input and outputs one column. The column
/// // outputted by decode_op is passed as input to random_jitter_op.
/// // random_jitter_op will output one column. Column "image" will be replaced by
/// // the column outputted by random_jitter_op (the very last operation). All other
/// // columns are unchanged. Since column_order is not specified, the order of the
/// // columns will remain the same.
/// // columns are unchanged.
/// dataset = dataset->Map({decode_op, random_jitter_op}, {"image"})
/// \endcode
std::shared_ptr<MapDataset> Map(const std::vector<TensorTransform *> &operations,
const std::vector<std::string> &input_columns = {},
const std::vector<std::string> &output_columns = {},
const std::vector<std::string> &project_columns = {},
const std::shared_ptr<DatasetCache> &cache = nullptr,
const std::vector<std::shared_ptr<DSCallback>> &callbacks = {}) {
std::vector<std::shared_ptr<TensorOperation>> transform_ops;
@ -434,8 +417,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
operations.begin(), operations.end(), std::back_inserter(transform_ops),
[](TensorTransform *op) -> std::shared_ptr<TensorOperation> { return op != nullptr ? op->Parse() : nullptr; });
return std::make_shared<MapDataset>(shared_from_this(), transform_ops, VectorStringToChar(input_columns),
VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache,
callbacks);
VectorStringToChar(output_columns), cache, callbacks);
}
/// \brief Function to create a MapDataset.
@ -451,14 +433,12 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
/// The size of this list must match the number of output columns of the
/// last operation. The default output_columns will have the same
/// name as the input columns, i.e., the columns will be replaced.
/// \param[in] project_columns A list of column names to project.
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \param[in] callbacks List of Dataset callbacks to be called.
/// \return Shared pointer to the current Dataset.
std::shared_ptr<MapDataset> Map(const std::vector<std::shared_ptr<TensorTransform>> &operations,
const std::vector<std::string> &input_columns = {},
const std::vector<std::string> &output_columns = {},
const std::vector<std::string> &project_columns = {},
const std::shared_ptr<DatasetCache> &cache = nullptr,
const std::vector<std::shared_ptr<DSCallback>> &callbacks = {}) {
std::vector<std::shared_ptr<TensorOperation>> transform_ops;
@ -467,8 +447,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
return op != nullptr ? op->Parse() : nullptr;
});
return std::make_shared<MapDataset>(shared_from_this(), transform_ops, VectorStringToChar(input_columns),
VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache,
callbacks);
VectorStringToChar(output_columns), cache, callbacks);
}
/// \brief Function to create a MapDataset.
@ -484,22 +463,19 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
/// The size of this list must match the number of output columns of the
/// last operation. The default output_columns will have the same
/// name as the input columns, i.e., the columns will be replaced.
/// \param[in] project_columns A list of column names to project.
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \param[in] callbacks List of Dataset callbacks to be called.
/// \return Shared pointer to the current Dataset.
std::shared_ptr<MapDataset> Map(const std::vector<std::reference_wrapper<TensorTransform>> &operations,
const std::vector<std::string> &input_columns = {},
const std::vector<std::string> &output_columns = {},
const std::vector<std::string> &project_columns = {},
const std::shared_ptr<DatasetCache> &cache = nullptr,
const std::vector<std::shared_ptr<DSCallback>> &callbacks = {}) {
std::vector<std::shared_ptr<TensorOperation>> transform_ops;
(void)std::transform(operations.begin(), operations.end(), std::back_inserter(transform_ops),
[](TensorTransform &op) -> std::shared_ptr<TensorOperation> { return op.Parse(); });
return std::make_shared<MapDataset>(shared_from_this(), transform_ops, VectorStringToChar(input_columns),
VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache,
callbacks);
VectorStringToChar(output_columns), cache, callbacks);
}
/// \brief Function to create a Project Dataset.
@ -617,7 +593,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
std::vector<std::pair<std::vector<char>, std::vector<int32_t>>> GetClassIndexingCharIF();
// Char interface(CharIF) of CreateIterator
std::shared_ptr<Iterator> CreateIteratorCharIF(const std::vector<std::vector<char>> &columns, int32_t num_epochs);
std::shared_ptr<Iterator> CreateIteratorCharIF(int32_t num_epochs);
// Char interface(CharIF) of DeviceQueue
bool DeviceQueueCharIF(const std::vector<char> &queue_name, const std::vector<char> &device_type, int32_t device_id,
@ -866,13 +842,11 @@ class DATASET_API MapDataset : public Dataset {
/// The size of this list must match the number of output columns of the
/// last operation. The default output_columns will have the same
/// name as the input columns, i.e., the columns will be replaced.
/// \param[in] project_columns A list of column names to project.
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \param[in] callbacks List of Dataset callbacks to be called.
MapDataset(const std::shared_ptr<Dataset> &input, const std::vector<std::shared_ptr<TensorOperation>> &operations,
const std::vector<std::vector<char>> &input_columns, const std::vector<std::vector<char>> &output_columns,
const std::vector<std::vector<char>> &project_columns, const std::shared_ptr<DatasetCache> &cache,
const std::vector<std::shared_ptr<DSCallback>> &callbacks);
const std::shared_ptr<DatasetCache> &cache, const std::vector<std::shared_ptr<DSCallback>> &callbacks);
/// \brief Destructor of MapDataset.
~MapDataset() override = default;

View File

@ -122,7 +122,6 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers);
/// \brief Function to create an PullBasedIterator over the Dataset
/// \param[in] columns List of columns to be used to specify the order of columns
/// \return Shared pointer to the Iterator
/// \par Example
/// \code
@ -131,10 +130,9 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
/// \endcode
std::shared_ptr<PullIterator> CreatePullBasedIterator(const std::vector<std::vector<char>> &columns = {});
std::shared_ptr<PullIterator> CreatePullBasedIterator();
/// \brief Function to create an Iterator over the Dataset pipeline
/// \param[in] columns List of columns to be used to specify the order of columns
/// \param[in] num_epochs Number of epochs to run through the pipeline, default -1 which means infinite epochs.
/// An empty row is returned at the end of each epoch
/// \return Shared pointer to the Iterator
@ -145,9 +143,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
/// \endcode
std::shared_ptr<Iterator> CreateIterator(const std::vector<std::string> &columns = {}, int32_t num_epochs = -1) {
return CreateIteratorCharIF(VectorStringToChar(columns), num_epochs);
}
std::shared_ptr<Iterator> CreateIterator(int32_t num_epochs = -1) { return CreateIteratorCharIF(num_epochs); }
/// \brief Function to transfer data through a device.
/// \note If device is Ascend, features of data will be transferred one by one. The limitation
@ -221,7 +217,6 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
/// The size of this list must match the number of output columns of the
/// last operation. The default output_columns will have the same
/// name as the input columns, i.e., the columns will be replaced
/// \param[in] project_columns A list of column names to project
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
/// \param[in] callbacks List of Dataset callbacks to be called.
/// \return Shared pointer to the current MapDataset
@ -233,36 +228,24 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
///
/// /* 1) Simple map example */
/// // Apply decode_op on column "image". This column will be replaced by the outputted
/// // column of decode_op. Since column_order is not provided, both columns "image"
/// // and "label" will be propagated to the child node in their original order.
/// // column of decode_op.
/// dataset = dataset->Map({decode_op}, {"image"});
///
/// // Decode and rename column "image" to "decoded_image".
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"});
///
/// // Specify the order of the output columns.
/// dataset = dataset->Map({decode_op}, {"image"}, {}, {"label", "image"});
///
/// // Rename column "image" to "decoded_image" and also specify the order of the output columns.
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"}, {"label", "decoded_image"});
///
/// // Rename column "image" to "decoded_image" and keep only this column.
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"}, {"decoded_image"});
///
/// /* 2) Map example with more than one operation */
/// // Create a dataset where the images are decoded, then randomly color jittered.
/// // decode_op takes column "image" as input and outputs one column. The column
/// // outputted by decode_op is passed as input to random_jitter_op.
/// // random_jitter_op will output one column. Column "image" will be replaced by
/// // the column outputted by random_jitter_op (the very last operation). All other
/// // columns are unchanged. Since column_order is not specified, the order of the
/// // columns will remain the same.
/// // columns are unchanged.
/// dataset = dataset->Map({decode_op, random_jitter_op}, {"image"})
/// \endcode
std::shared_ptr<MapDataset> Map(const std::vector<TensorTransform *> &operations,
const std::vector<std::string> &input_columns = {},
const std::vector<std::string> &output_columns = {},
const std::vector<std::string> &project_columns = {},
const std::shared_ptr<DatasetCache> &cache = nullptr,
const std::vector<std::shared_ptr<DSCallback>> &callbacks = {}) {
std::vector<std::shared_ptr<TensorOperation>> transform_ops;
@ -270,8 +253,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
operations.begin(), operations.end(), std::back_inserter(transform_ops),
[](TensorTransform *op) -> std::shared_ptr<TensorOperation> { return op != nullptr ? op->Parse() : nullptr; });
return std::make_shared<MapDataset>(shared_from_this(), transform_ops, VectorStringToChar(input_columns),
VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache,
callbacks);
VectorStringToChar(output_columns), cache, callbacks);
}
/// \brief Function to create a MapDataset
@ -287,14 +269,12 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
/// The size of this list must match the number of output columns of the
/// last operation. The default output_columns will have the same
/// name as the input columns, i.e., the columns will be replaced
/// \param[in] project_columns A list of column names to project
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
/// \param[in] callbacks List of Dataset callbacks to be called.
/// \return Shared pointer to the current MapDataset
std::shared_ptr<MapDataset> Map(const std::vector<std::shared_ptr<TensorTransform>> &operations,
const std::vector<std::string> &input_columns = {},
const std::vector<std::string> &output_columns = {},
const std::vector<std::string> &project_columns = {},
const std::shared_ptr<DatasetCache> &cache = nullptr,
const std::vector<std::shared_ptr<DSCallback>> &callbacks = {}) {
std::vector<std::shared_ptr<TensorOperation>> transform_ops;
@ -303,8 +283,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
return op != nullptr ? op->Parse() : nullptr;
});
return std::make_shared<MapDataset>(shared_from_this(), transform_ops, VectorStringToChar(input_columns),
VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache,
callbacks);
VectorStringToChar(output_columns), cache, callbacks);
}
/// \brief Function to create a MapDataset
@ -320,22 +299,19 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
/// The size of this list must match the number of output columns of the
/// last operation. The default output_columns will have the same
/// name as the input columns, i.e., the columns will be replaced
/// \param[in] project_columns A list of column names to project
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
/// \param[in] callbacks List of Dataset callbacks to be called.
/// \return Shared pointer to the current MapDataset
std::shared_ptr<MapDataset> Map(const std::vector<std::reference_wrapper<TensorTransform>> &operations,
const std::vector<std::string> &input_columns = {},
const std::vector<std::string> &output_columns = {},
const std::vector<std::string> &project_columns = {},
const std::shared_ptr<DatasetCache> &cache = nullptr,
const std::vector<std::shared_ptr<DSCallback>> &callbacks = {}) {
std::vector<std::shared_ptr<TensorOperation>> transform_ops;
(void)std::transform(operations.begin(), operations.end(), std::back_inserter(transform_ops),
[](TensorTransform &op) -> std::shared_ptr<TensorOperation> { return op.Parse(); });
return std::make_shared<MapDataset>(shared_from_this(), transform_ops, VectorStringToChar(input_columns),
VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache,
callbacks);
VectorStringToChar(output_columns), cache, callbacks);
}
/// \brief Function to create a Project Dataset
@ -380,7 +356,7 @@ class DATASET_API Dataset : public std::enable_shared_from_this<Dataset> {
std::vector<std::pair<std::vector<char>, std::vector<int32_t>>> GetClassIndexingCharIF();
// Char interface(CharIF) of CreateIterator
std::shared_ptr<Iterator> CreateIteratorCharIF(const std::vector<std::vector<char>> &columns, int32_t num_epochs);
std::shared_ptr<Iterator> CreateIteratorCharIF(int32_t num_epochs);
// Char interface(CharIF) of DeviceQueue
bool DeviceQueueCharIF(const std::vector<char> &queue_name, const std::vector<char> &device_type, int32_t device_id,
@ -503,8 +479,7 @@ class DATASET_API MapDataset : public Dataset {
public:
MapDataset(const std::shared_ptr<Dataset> &input, const std::vector<std::shared_ptr<TensorOperation>> &operations,
const std::vector<std::vector<char>> &input_columns, const std::vector<std::vector<char>> &output_columns,
const std::vector<std::vector<char>> &project_columns, const std::shared_ptr<DatasetCache> &cache,
const std::vector<std::shared_ptr<DSCallback>> &callbacks);
const std::shared_ptr<DatasetCache> &cache, const std::vector<std::shared_ptr<DSCallback>> &callbacks);
~MapDataset() override = default;
};

View File

@ -839,9 +839,6 @@ class Dataset:
len(output_columns). The size of this list must match the number of output
columns of the last operation. (default=None, output columns will have the same
name as the input columns, i.e., the columns will be replaced).
column_order (Union[str, list[str]], optional): Specifies the list of all the columns you need in the whole
dataset (default=None). The parameter is required when len(input_column) != len(output_column).
Caution: the list here is not just the columns specified in parameter input_columns and output_columns.
num_parallel_workers (int, optional): Number of threads used to process the dataset in
parallel (default=None, the value from the configuration will be used).
**kwargs:
@ -871,6 +868,8 @@ class Dataset:
Examples:
>>> # dataset is an instance of Dataset which has 2 columns, "image" and "label".
>>> # image is of type bytes type which can be decoded to RGB
>>> # label is of type int32
>>>
>>> # Define two operations, where each operation accepts 1 input column and outputs 1 column.
>>> decode_op = c_vision.Decode(rgb=True)
@ -879,30 +878,15 @@ class Dataset:
>>>
>>> # 1) Simple map example.
>>>
>>> # Apply decode_op on column "image". This column will be replaced by the outputted
>>> # column of decode_op. Since column_order is not provided, both columns "image"
>>> # and "label" will be propagated to the child node in their original order.
>>> # Apply decode_op on column "image".
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"])
>>>
>>> # Decode and rename column "image" to "decoded_image".
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"], output_columns=["decoded_image"])
>>>
>>> # Specify the order of the output columns.
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"],
... output_columns=None, column_order=["label", "image"])
>>>
>>> # Rename column "image" to "decoded_image" and also specify the order of the output columns.
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"],
... output_columns=["decoded_image"], column_order=["label", "decoded_image"])
>>>
>>> # Rename column "image" to "decoded_image" and keep only this column.
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"],
... output_columns=["decoded_image"], column_order=["decoded_image"])
>>>
>>> # A simple example for mapping pyfunc. Renaming columns and specifying column order
>>> # work in the same way as the previous examples.
>>> # A simple example for user defined python function transform.
>>> dataset = ds.NumpySlicesDataset(data=[[0, 1, 2]], column_names=["data"])
>>> dataset = dataset.map(operations=[(lambda x: x + 1)], input_columns=["data"])
>>> dataset = dataset.map(operations=[(lambda x: x - 1)], input_columns=["data"])
>>>
>>> # 2) Map example with more than one operation.
>>>
@ -911,17 +895,14 @@ class Dataset:
>>> # outputted by decode_op is passed as input to random_jitter_op.
>>> # random_jitter_op will output one column. Column "image" will be replaced by
>>> # the column outputted by random_jitter_op (the very last operation). All other
>>> # columns are unchanged. Since column_order is not specified, the order of the
>>> # columns will remain the same.
>>> # columns are unchanged.
>>> dataset = dataset.map(operations=[decode_op, random_jitter_op], input_columns=["image"])
>>>
>>> # Rename the column outputted by random_jitter_op to "image_mapped".
>>> # Specifying column order works in the same way as examples in 1).
>>> dataset = dataset.map(operations=[decode_op, random_jitter_op], input_columns=["image"],
... output_columns=["image_mapped"])
>>>
>>> # Map with multiple operations using pyfunc. Renaming columns and specifying column order
>>> # work in the same way as examples in 1).
>>> # Map with multiple operations using pyfunc and rename column's name
>>> dataset = ds.NumpySlicesDataset(data=[[0, 1, 2]], column_names=["data"])
>>> dataset = dataset.map(operations=[(lambda x: x * x), (lambda x: x - 1)], input_columns=["data"],
... output_columns=["data_mapped"])
@ -938,22 +919,9 @@ class Dataset:
>>> operations = [(lambda x, y: (x, x + y, x + y + 1)),
... (lambda x, y, z: x * y * z),
... (lambda x: (x % 2, x % 3, x % 5, x % 7))]
>>>
>>> # Note: Since the number of input columns is not the same as the number of
>>> # output columns, the output_columns and column_order parameters must be
>>> # specified. Otherwise, this map call will also result in an error.
>>>
>>> dataset = ds.NumpySlicesDataset(data=([[0, 1, 2]], [[3, 4, 5]]), column_names=["x", "y"])
>>>
>>> # Propagate all columns to the child node in this order:
>>> dataset = dataset.map(operations, input_columns=["x", "y"],
... output_columns=["mod2", "mod3", "mod5", "mod7"],
... column_order=["mod2", "mod3", "mod5", "mod7"])
>>>
>>> # Propagate some columns to the child node in this order:
>>> dataset = dataset.map(operations, input_columns=["x", "y"],
... output_columns=["mod2", "mod3", "mod5", "mod7"],
... column_order=["mod7", "mod3", "col2"])
... output_columns=["mod2", "mod3", "mod5", "mod7"])
"""
if hasattr(self, 'operator_mixed') and getattr(self, 'operator_mixed') is True:
num_parallel_workers = 1
@ -962,12 +930,7 @@ class Dataset:
"mindspore.numpy module and etc, which do not support multi-thread compiling, recommend to replace it "
"with python implemented operator like numpy etc. Here decrease 'num_parallel_workers' into 1.")
if column_order is not None:
logger.warning("The parameter column_order will be deprecated in the future. "
"Please use '.project' operation instead.")
return MapDataset(self, operations, input_columns, output_columns, column_order, num_parallel_workers,
**kwargs)
return MapDataset(self, operations, input_columns, output_columns, num_parallel_workers, **kwargs)
@check_filter
def filter(self, predicate, input_columns=None, num_parallel_workers=None):
@ -3330,9 +3293,6 @@ class MapDataset(UnionBaseDataset):
The size of the list should match the number of outputs of the last operator
(default=None, output columns will be the input columns, i.e., the columns will
be replaced).
column_order (list[str], optional): Specifies the list of all the columns you need in the whole
dataset. The parameter is required when len(input_column) != len(output_column). Caution: the list here
is not just the columns specified in parameter input_columns and output_columns.
num_parallel_workers (int, optional): Number of workers to process the dataset
in parallel (default=None).
python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
@ -3343,12 +3303,9 @@ class MapDataset(UnionBaseDataset):
max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
data between processes. This is only used if python_multiprocessing is set to True (default=16).
offload (bool, optional): Flag to indicate whether offload is used (Default=None).
Raises:
ValueError: If len(input_columns) != len(output_columns) and column_order is not specified.
"""
def __init__(self, input_dataset, operations=None, input_columns=None, output_columns=None, column_order=None,
def __init__(self, input_dataset, operations=None, input_columns=None, output_columns=None,
num_parallel_workers=None, python_multiprocessing=False, cache=None, callbacks=None, max_rowsize=16,
offload=None):
super().__init__(children=input_dataset, num_parallel_workers=num_parallel_workers, cache=cache)
@ -3367,17 +3324,10 @@ class MapDataset(UnionBaseDataset):
self.input_columns = to_list(input_columns)
self.output_columns = to_list(output_columns)
self.column_order = replace_none(column_order, [])
# If output_columns were not provided then use input_columns
self.output_columns = self.input_columns if not self.output_columns else self.output_columns
if self.input_columns and self.output_columns \
and len(self.input_columns) != len(self.output_columns) \
and not self.column_order:
raise ValueError("When length of input_columns and output_columns are not equal,"
" column_order must be specified.")
self.python_multiprocessing = python_multiprocessing
self.process_pool = None
@ -3410,7 +3360,7 @@ class MapDataset(UnionBaseDataset):
self.prepare_multiprocessing()
callbacks = [cb.create_runtime_obj() for cb in self.callbacks]
return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns, self.column_order,
return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns,
callbacks, self.max_rowsize, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
def __deepcopy__(self, memodict):

View File

@ -1368,6 +1368,20 @@ def check_map(method):
[operations, input_columns, output_columns, column_order, num_parallel_workers, param_dict], _ = \
parse_user_args(method, *args, **kwargs)
if column_order is not None:
raise ValueError("The parameter 'column_order' had been deleted in map operation. "
"Please use '.project' operation instead.\n"
">> # Usage of old api:\n"
">> dataset = dataset.map(operations=PyFunc,\n"
">> input_columns=[\"column_a\"],\n"
">> output_columns=[\"column_b\", \"column_c\"],\n"
">> column_order=[\"column_b\", \"column_c\"])\n"
">> # Usage of new api:\n"
">> dataset = dataset.map(operations=PyFunc,\n"
">> input_columns=[\"column_a\"],\n"
">> output_columns=[\"column_b\", \"column_c\"])\n"
">> dataset = dataset.project([\"column_b\", \"column_c\"])")
(python_multiprocessing, max_rowsize, cache, callbacks, offload) = get_map_kwargs_from_dict(param_dict)
# check whether network computing operator exist in input operations(python function)
@ -1394,10 +1408,8 @@ def check_map(method):
raise ValueError("Input operations should not contain network computing operator like in "
"mindspore.nn or mindspore.ops, got operation: ", str(item))
nreq_param_columns = ['input_columns', 'output_columns', 'column_order']
nreq_param_columns = ['input_columns', 'output_columns']
if column_order is not None:
type_check(column_order, (list,), "column_order")
if num_parallel_workers is not None:
check_num_parallel_workers(num_parallel_workers)
type_check(python_multiprocessing, (bool,), "python_multiprocessing")
@ -1413,7 +1425,7 @@ def check_map(method):
else:
type_check(callbacks, (DSCallback,), "callbacks")
for param_name, param in zip(nreq_param_columns, [input_columns, output_columns, column_order]):
for param_name, param in zip(nreq_param_columns, [input_columns, output_columns]):
if param is not None:
check_columns(param, param_name)
if callbacks is not None:

View File

@ -131,8 +131,7 @@ class JiebaTokenizer(TextTensorOperation):
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP, with_offsets=True)
>>> text_file_dataset_1 = text_file_dataset_1.map(operations=tokenizer_op, input_columns=["text"],
... output_columns=["token", "offsets_start", "offsets_limit"],
... column_order=["token", "offsets_start", "offsets_limit"])
... output_columns=["token", "offsets_start", "offsets_limit"])
"""
@check_jieba_init
@ -629,8 +628,7 @@ class UnicodeCharTokenizer(TextTensorOperation):
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.UnicodeCharTokenizer(with_offsets=True)
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
... output_columns=["token", "offsets_start", "offsets_limit"],
... column_order=["token", "offsets_start", "offsets_limit"])
... output_columns=["token", "offsets_start", "offsets_limit"])
"""
@check_with_offsets
@ -679,8 +677,7 @@ class WordpieceTokenizer(TextTensorOperation):
>>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token='[UNK]',
... max_bytes_per_token=100, with_offsets=True)
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
... output_columns=["token", "offsets_start", "offsets_limit"],
... column_order=["token", "offsets_start", "offsets_limit"])
... output_columns=["token", "offsets_start", "offsets_limit"])
"""
@check_wordpiece_tokenizer
@ -766,9 +763,7 @@ if platform.system().lower() != 'windows':
... with_offsets=True)
>>> text_file_dataset_1 = text_file_dataset_1.map(operations=tokenizer_op, input_columns=["text"],
... output_columns=["token", "offsets_start",
... "offsets_limit"],
... column_order=["token", "offsets_start",
... "offsets_limit"])
... "offsets_limit"])
"""
@check_basic_tokenizer
@ -862,9 +857,7 @@ if platform.system().lower() != 'windows':
... with_offsets=True)
>>> text_file_dataset_1 = text_file_dataset_1.map(operations=tokenizer_op, input_columns=["text"],
... output_columns=["token", "offsets_start",
... "offsets_limit"],
... column_order=["token", "offsets_start",
... "offsets_limit"])
... "offsets_limit"])
"""
@check_bert_tokenizer
@ -1054,9 +1047,7 @@ if platform.system().lower() != 'windows':
>>> tokenizer_op = text.RegexTokenizer(delim_pattern, with_offsets=True)
>>> text_file_dataset_1 = text_file_dataset_1.map(operations=tokenizer_op, input_columns=["text"],
... output_columns=["token", "offsets_start",
... "offsets_limit"],
... column_order=["token", "offsets_start",
... "offsets_limit"])
... "offsets_limit"])
"""
@check_regex_tokenizer
@ -1097,8 +1088,7 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=True)
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
... output_columns=["token", "offsets_start", "offsets_limit"],
... column_order=["token", "offsets_start", "offsets_limit"])
... output_columns=["token", "offsets_start", "offsets_limit"])
"""
@ -1139,8 +1129,7 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.WhitespaceTokenizer(with_offsets=True)
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
... output_columns=["token", "offsets_start", "offsets_limit"],
... column_order=["token", "offsets_start", "offsets_limit"])
... output_columns=["token", "offsets_start", "offsets_limit"])
"""
@check_with_offsets

View File

@ -430,8 +430,7 @@ class Duplicate(TensorOperation):
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["x"])
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=c_transforms.Duplicate(),
... input_columns=["x"],
... output_columns=["x", "y"],
... column_order=["x", "y"])
... output_columns=["x", "y"])
>>> # Data after
>>> # | x | y |
>>> # +---------+---------+
@ -477,8 +476,7 @@ class Unique(TensorOperation):
>>> dataset = ds.NumpySlicesDataset(data, ["x"])
>>> dataset = dataset.map(operations=c_transforms.Unique(),
... input_columns=["x"],
... output_columns=["x", "y", "z"],
... column_order=["x", "y", "z"])
... output_columns=["x", "y", "z"])
>>> # Data after
>>> # | x | y |z |
>>> # +---------+-----------------+---------+

View File

@ -402,8 +402,7 @@ class Duplicate(TensorOperation):
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["x"])
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms.Duplicate(),
... input_columns=["x"],
... output_columns=["x", "y"],
... column_order=["x", "y"])
... output_columns=["x", "y"])
>>> # Data after
>>> # | x | y |
>>> # +---------+---------+
@ -957,8 +956,7 @@ class Unique(TensorOperation):
>>> dataset = ds.NumpySlicesDataset(data, ["x"])
>>> dataset = dataset.map(operations=transforms.Unique(),
... input_columns=["x"],
... output_columns=["x", "y", "z"],
... column_order=["x", "y", "z"])
... output_columns=["x", "y", "z"])
>>> # Data after
>>> # | x | y |z |
>>> # +---------+-----------------+---------+

View File

@ -312,8 +312,7 @@ class BoundingBoxAugment(ImageTensorOperation):
>>> # map to apply ops
>>> image_folder_dataset = image_folder_dataset.map(operations=[bbox_aug_op],
... input_columns=["image", "bbox"],
... output_columns=["image", "bbox"],
... column_order=["image", "bbox"])
... output_columns=["image", "bbox"])
"""
@deprecated_c_vision()
@ -2455,7 +2454,7 @@ class SlicePatches(ImageTensorOperation):
>>> cols = ['img' + str(x) for x in range(num_h*num_w)]
>>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
... input_columns=["image"],
... output_columns=cols, column_order=cols)
... output_columns=cols)
"""
@deprecated_c_vision()

View File

@ -581,8 +581,7 @@ class BoundingBoxAugment(ImageTensorOperation):
>>> # map to apply ops
>>> image_folder_dataset = image_folder_dataset.map(operations=[bbox_aug_op],
... input_columns=["image", "bbox"],
... output_columns=["image", "bbox"],
... column_order=["image", "bbox"])
... output_columns=["image", "bbox"])
"""
@check_bounding_box_augment_cpp
@ -3834,7 +3833,7 @@ class SlicePatches(ImageTensorOperation):
>>> cols = ['img' + str(x) for x in range(num_h*num_w)]
>>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
... input_columns=["image"],
... output_columns=cols, column_order=cols)
... output_columns=cols)
"""
@check_slice_patches

View File

@ -1436,7 +1436,6 @@ def _save_dataset_to_mindir(model, dataset):
model.preprocessor.op.add()
model.preprocessor.op[-1].input_columns = json.dumps(op['input_columns'])
model.preprocessor.op[-1].output_columns = json.dumps(op['output_columns'])
model.preprocessor.op[-1].project_columns = json.dumps(op['project_columns'])
model.preprocessor.op[-1].op_type = json.dumps(op['op_type'])
model.preprocessor.op[-1].operations = json.dumps(op['operations'])
model.preprocessor.op[-1].offload = op['offload'] if 'offload' in op.keys() else False

View File

@ -212,7 +212,6 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100
np.array(y).flatten().reshape(batch_size, 39),
np.array(z).flatten().reshape(batch_size, 1))),
input_columns=['feat_ids', 'feat_vals', 'label'],
column_order=['feat_ids', 'feat_vals', 'label'],
num_parallel_workers=8)
data_set = data_set.repeat(epochs)
return data_set
@ -260,7 +259,6 @@ def _get_tf_dataset(directory, train_mode=True, epochs=1, batch_size=1000,
np.array(y).flatten().reshape(batch_size, 39),
np.array(z).flatten().reshape(batch_size, 1))),
input_columns=['feat_ids', 'feat_vals', 'label'],
column_order=['feat_ids', 'feat_vals', 'label'],
num_parallel_workers=8)
data_set = data_set.repeat(epochs)
return data_set

View File

@ -60,7 +60,7 @@ def _get_tf_dataset(data_dir, train_mode=True, epochs=1, batch_size=1000,
np.array(y).flatten().reshape(batch_size, 39),
np.array(z).flatten().reshape(batch_size, 1))),
input_columns=['feat_ids', 'feat_vals', 'label'],
column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8)
num_parallel_workers=8)
# if train_mode:
data_set = data_set.repeat(epochs)
return data_set
@ -101,7 +101,6 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100
np.array(y).flatten().reshape(batch_size, 39),
np.array(z).flatten().reshape(batch_size, 1))),
input_columns=['feat_ids', 'feat_vals', 'label'],
column_order=['feat_ids', 'feat_vals', 'label'],
num_parallel_workers=8)
data_set = data_set.repeat(epochs)
return data_set

View File

@ -304,14 +304,13 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=10, device_num
hwc_to_chw = C.HWC2CHW()
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
num_parallel_workers=num_parallel_workers)
ds = ds.project(["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"])
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num)
else:
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "annotation"],
column_order=["image", "image_shape", "annotation"],
num_parallel_workers=num_parallel_workers)
return ds

View File

@ -181,7 +181,6 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"],
output_columns=["image", "image_shape", "img_id"],
column_order=["image", "image_shape", "img_id"],
num_parallel_workers=8)
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True)

View File

@ -71,7 +71,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiNestedCache) {
EXPECT_NE(decode_op, nullptr);
// Create a Map operation on ds
ds = ds->Map({decode_op}, {}, {}, {"image"}, some_cache);
ds = ds->Map({decode_op}, {}, {}, some_cache);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset

View File

@ -236,7 +236,8 @@ TEST_F(MindDataTestPipeline, TestAlbumIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -272,7 +273,8 @@ TEST_F(MindDataTestPipeline, TestAlbumIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -138,7 +138,8 @@ TEST_F(MindDataTestPipeline, TestCaltech256IteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -174,7 +175,8 @@ TEST_F(MindDataTestPipeline, TestCaltech256IteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -94,8 +94,7 @@ TEST_F(MindDataTestPipeline, TestCMUArcticBasicWithPipeline) {
auto op = transforms::PadEnd({1, 50000});;
std::vector<std::string> input_columns = {"waveform"};
std::vector<std::string> output_columns = {"waveform"};
std::vector<std::string> project_columns = {"transcript", "utterance_id", "waveform"};
ds = ds->Map({op}, input_columns, output_columns, project_columns);
ds = ds->Map({op}, input_columns, output_columns);
EXPECT_NE(ds, nullptr);
ds = ds->Repeat(10);
EXPECT_NE(ds, nullptr);

View File

@ -138,7 +138,8 @@ TEST_F(MindDataTestPipeline, TestDBpediaDatasetIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "class" column and drop others
std::vector<std::string> columns = {"class"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -175,7 +176,8 @@ TEST_F(MindDataTestPipeline, TestDBpediaDatasetIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -145,7 +145,8 @@ TEST_F(MindDataTestPipeline, TestDIV2KIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"hr_image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -182,7 +183,8 @@ TEST_F(MindDataTestPipeline, TestDIV2KIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}
@ -385,4 +387,4 @@ TEST_F(MindDataTestPipeline, TestDIV2KWithNullSamplerError) {
std::shared_ptr<Iterator> iter = ds->CreateIterator();
// Expect failure: invalid DIV2K input, sampler cannot be nullptr
EXPECT_EQ(iter, nullptr);
}
}

View File

@ -237,7 +237,8 @@ TEST_F(MindDataTestPipeline, TestEMnistIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -273,7 +274,8 @@ TEST_F(MindDataTestPipeline, TestEMnistIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -136,7 +136,8 @@ TEST_F(MindDataTestPipeline, TestFakeImageIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -171,7 +172,8 @@ TEST_F(MindDataTestPipeline, TestFakeImageIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -140,7 +140,8 @@ TEST_F(MindDataTestPipeline, TestFashionMnistIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -232,7 +233,8 @@ TEST_F(MindDataTestPipeline, TestFashionMnistIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -136,7 +136,8 @@ TEST_F(MindDataTestPipeline, TestFlickrIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -172,7 +173,8 @@ TEST_F(MindDataTestPipeline, TestFlickrIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}
@ -386,4 +388,4 @@ TEST_F(MindDataTestPipeline, TestFlickrWithNullSamplerError) {
std::shared_ptr<Iterator> iter = ds->CreateIterator();
// Expect failure: invalid Flickr30k input, sampler cannot be nullptr
EXPECT_EQ(iter, nullptr);
}
}

View File

@ -90,8 +90,7 @@ TEST_F(MindDataTestPipeline, TestGTZANBasicWithPipeline) {
auto op = transforms::PadEnd({1, 50000});
std::vector<std::string> input_columns = {"waveform"};
std::vector<std::string> output_columns = {"waveform"};
std::vector<std::string> project_columns = {"label", "waveform", "sample_rate"};
ds = ds->Map({op}, input_columns, output_columns, project_columns);
ds = ds->Map({op}, input_columns, output_columns);
EXPECT_NE(ds, nullptr);
ds = ds->Repeat(10);
EXPECT_NE(ds, nullptr);

View File

@ -209,7 +209,8 @@ TEST_F(MindDataTestPipeline, TestIMDBIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "text" column and drop others
std::vector<std::string> columns = {"text"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -245,7 +246,8 @@ TEST_F(MindDataTestPipeline, TestIMDBIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}
@ -317,4 +319,4 @@ TEST_F(MindDataTestPipeline, TestIMDBWithNullSamplerError) {
std::shared_ptr<Iterator> iter = ds->CreateIterator();
// Expect failure: invalid IMDB input, sampler cannot be nullptr
EXPECT_EQ(iter, nullptr);
}
}

View File

@ -80,7 +80,8 @@ TEST_F(MindDataTestPipeline, TestIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -121,7 +122,8 @@ TEST_F(MindDataTestPipeline, TestIteratorReOrder) {
// Create an iterator over the result of the above dataset
// Reorder "image" and "label" column
std::vector<std::string> columns = {"label", "image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -165,7 +167,8 @@ TEST_F(MindDataTestPipeline, TestIteratorTwoColumns) {
// Create an iterator over the result of the above dataset
// Only select "image" and "bbox" column
std::vector<std::string> columns = {"image", "bbox"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -204,7 +207,8 @@ TEST_F(MindDataTestPipeline, TestIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}
@ -220,7 +224,7 @@ TEST_F(MindDataTestPipeline, TestIteratorNumEpoch) {
ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2}));
std::shared_ptr<Dataset> ds = RandomData(random_data_num_row, schema)->SetNumWorkers(1);
std::shared_ptr<Iterator> iter = ds->CreateIterator({}, num_epochs);
std::shared_ptr<Iterator> iter = ds->CreateIterator(num_epochs);
ASSERT_NE(iter, nullptr); // should terminate test case if iterator is null
std::unordered_map<std::string, mindspore::MSTensor> row;
@ -253,6 +257,6 @@ TEST_F(MindDataTestPipeline, TestIteratorNumEpochFail) {
ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2}));
std::shared_ptr<Dataset> ds = RandomData(3, schema)->SetNumWorkers(1);
// expect nullptr due to incorrect num_epochs value.
EXPECT_EQ(ds->CreateIterator({}, 0), nullptr);
EXPECT_EQ(ds->CreateIterator({}, -2), nullptr);
EXPECT_EQ(ds->CreateIterator(0), nullptr);
EXPECT_EQ(ds->CreateIterator(-2), nullptr);
}

View File

@ -140,7 +140,8 @@ TEST_F(MindDataTestPipeline, TestKMnistDatasetIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -176,7 +177,8 @@ TEST_F(MindDataTestPipeline, TestKMnistDatasetIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}
@ -248,7 +250,8 @@ TEST_F(MindDataTestPipeline, TestKMnistIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -72,9 +72,7 @@ TEST_F(MindDataTestPipeline, TestLibriTTSBasicWithPipeline) {
auto op = transforms::PadEnd({1, 500000});
std::vector<std::string> input_columns = {"waveform"};
std::vector<std::string> output_columns = {"waveform"};
std::vector<std::string> project_columns = {"sample_rate", "original_text", "normalized_text", "speaker_id",
"chapter_id", "utterance_id", "waveform"};
ds = ds->Map({op}, input_columns, output_columns, project_columns);
ds = ds->Map({op}, input_columns, output_columns);
EXPECT_NE(ds, nullptr);
ds = ds->Repeat(5);
EXPECT_NE(ds, nullptr);
@ -308,4 +306,4 @@ TEST_F(MindDataTestPipeline, TestLibriTTSSequentialSamplers) {
EXPECT_EQ(i, 2);
iter->Stop();
}
}

View File

@ -141,7 +141,8 @@ TEST_F(MindDataTestPipeline, TestLJSpeechDatasetIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "waveform" column and drop others
std::vector<std::string> columns = {"waveform"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -175,7 +176,8 @@ TEST_F(MindDataTestPipeline, TestLJSpeechDatasetIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -130,7 +130,8 @@ TEST_F(MindDataTestPipeline, TestManifestIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -164,7 +165,8 @@ TEST_F(MindDataTestPipeline, TestManifestIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -1087,7 +1087,7 @@ TEST_F(MindDataTestPipeline, TestProjectMap) {
EXPECT_NE(random_vertical_flip_op, nullptr);
// Create a Map operation on ds
ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"});
ds = ds->Map({random_vertical_flip_op}, {}, {});
EXPECT_NE(ds, nullptr);
// Create a Project operation on ds
@ -1139,7 +1139,7 @@ TEST_F(MindDataTestPipeline, TestProjectDuplicateColumnFail) {
EXPECT_NE(random_vertical_flip_op, nullptr);
// Create a Map operation on ds
ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"});
ds = ds->Map({random_vertical_flip_op}, {}, {});
EXPECT_NE(ds, nullptr);
// Create a Project operation on ds
@ -1171,7 +1171,7 @@ TEST_F(MindDataTestPipeline, TestMapDuplicateColumnFail) {
EXPECT_NE(random_vertical_flip_op, nullptr);
// Create a Map operation on ds
auto ds1 = ds->Map({random_vertical_flip_op}, {"image", "image"}, {}, {});
auto ds1 = ds->Map({random_vertical_flip_op}, {"image", "image"}, {});
EXPECT_NE(ds1, nullptr);
// Create an iterator over the result of the above dataset
@ -1180,7 +1180,7 @@ TEST_F(MindDataTestPipeline, TestMapDuplicateColumnFail) {
EXPECT_EQ(iter1, nullptr);
// Create a Map operation on ds
auto ds2 = ds->Map({random_vertical_flip_op}, {}, {"label", "label"}, {});
auto ds2 = ds->Map({random_vertical_flip_op}, {}, {"label", "label"});
EXPECT_NE(ds2, nullptr);
// Create an iterator over the result of the above dataset
@ -1189,13 +1189,8 @@ TEST_F(MindDataTestPipeline, TestMapDuplicateColumnFail) {
EXPECT_EQ(iter2, nullptr);
// Create a Map operation on ds
auto ds3 = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "image"});
auto ds3 = ds->Map({random_vertical_flip_op}, {}, {});
EXPECT_NE(ds3, nullptr);
// Create an iterator over the result of the above dataset
std::shared_ptr<Iterator> iter3 = ds3->CreateIterator();
// Expect failure: duplicate Map op project column name
EXPECT_EQ(iter3, nullptr);
}
/// Feature: Map op
@ -1211,7 +1206,7 @@ TEST_F(MindDataTestPipeline, TestMapNullOperation) {
// Create a Map operation on ds
std::shared_ptr<TensorTransform> operation = nullptr;
auto ds1 = ds->Map({operation}, {"image"}, {}, {});
auto ds1 = ds->Map({operation}, {"image"}, {});
EXPECT_NE(ds1, nullptr);
// Create an iterator over the result of the above dataset
@ -1241,13 +1236,13 @@ TEST_F(MindDataTestPipeline, TestProjectMapAutoInjection) {
EXPECT_NE(resize_op, nullptr);
// Create a Map operation on ds
// {"image"} is the project columns. This will trigger auto injection of ProjectOp after MapOp.
ds = ds->Map({resize_op}, {}, {}, {"image"});
ds = ds->Map({resize_op}, {}, {});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
std::shared_ptr<ProjectDataset> project_ds = ds->Project({"image"});
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// iterate over the dataset and get each row
@ -2470,13 +2465,13 @@ TEST_F(MindDataTestPipeline, TestTFRecordDecodeRepeatResize) {
EXPECT_NE(resize_op, nullptr);
// Create a Map operation on ds
// {"image"} is the project columns. This will trigger auto injection of ProjectOp after MapOp.
ds = ds->Map({decode_op, resize_op}, {}, {}, {"image"});
ds = ds->Map({decode_op, resize_op}, {}, {});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
std::shared_ptr<ProjectDataset> project_ds = ds->Project({"image"});
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// iterate over the dataset and get each row
@ -2756,9 +2751,11 @@ TEST_F(MindDataTestPipeline, Test1to3) {
EXPECT_NE(one_to_three_op, nullptr);
// Create a Map operation on ds
ds = ds->Map({one_to_three_op}, {"image"}, {"X", "Y", "Z"}, {"X", "Y", "Z", "label", "A", "B"});
ds = ds->Map({one_to_three_op}, {"image"}, {"X", "Y", "Z"});
EXPECT_NE(ds, nullptr);
ds = ds->Project({"X", "Y", "Z", "label", "A", "B"});
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();

View File

@ -176,7 +176,8 @@ TEST_F(MindDataTestPipeline, TestPennTreebankDatasetIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "text" column and drop others
std::vector<std::string> columns = {"text"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -216,7 +217,8 @@ TEST_F(MindDataTestPipeline, TestPennTreebankDatasetIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -174,7 +174,8 @@ TEST_F(MindDataTestPipeline, TestPhotoTourIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -210,7 +211,8 @@ TEST_F(MindDataTestPipeline, TestPhotoTourIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -218,7 +218,8 @@ TEST_F(MindDataTestPipeline, TestPlaces365IteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -255,7 +256,8 @@ TEST_F(MindDataTestPipeline, TestPlaces365IteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -288,7 +288,8 @@ TEST_F(MindDataTestPipeline, TestQMnistIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -324,7 +325,8 @@ TEST_F(MindDataTestPipeline, TestQMnistIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -139,7 +139,8 @@ TEST_F(MindDataTestPipeline, TestSBUIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -173,7 +174,8 @@ TEST_F(MindDataTestPipeline, TestSBUIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -137,7 +137,8 @@ TEST_F(MindDataTestPipeline, TestSemeionIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -173,7 +174,8 @@ TEST_F(MindDataTestPipeline, TestSemeionIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -143,7 +143,8 @@ TEST_F(MindDataTestPipeline, TestSpeechCommandsDatasetIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "waveform" column and drop others
std::vector<std::string> columns = {"waveform"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -179,7 +180,8 @@ TEST_F(MindDataTestPipeline, TestSpeechCommandsDatasetIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -287,7 +287,8 @@ TEST_F(MindDataTestPipeline, TestSTL10DatasetIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -323,7 +324,8 @@ TEST_F(MindDataTestPipeline, TestSTL10DatasetIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -212,7 +212,8 @@ TEST_F(MindDataTestPipeline, TestTedliumDatasetIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "waveform" column and drop others
std::vector<std::string> columns = {"waveform"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -246,7 +247,8 @@ TEST_F(MindDataTestPipeline, TestTedliumDatasetIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -51,7 +51,7 @@ TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasic) {
EXPECT_NE(random_horizontal_flip_op, nullptr);
// Create a Map operation on ds
ds = ds->Map({decode_op, random_horizontal_flip_op}, {}, {}, {"image"});
ds = ds->Map({decode_op, random_horizontal_flip_op}, {}, {});
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
@ -109,7 +109,7 @@ TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasicGetters) {
EXPECT_NE(random_horizontal_flip_op, nullptr);
// Create a Map operation on ds
ds = ds->Map({random_horizontal_flip_op}, {}, {}, {"image"});
ds = ds->Map({random_horizontal_flip_op}, {}, {});
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
@ -624,4 +624,4 @@ TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasic7Row) {
// Manually terminate the pipeline
iter->Stop();
}
}

View File

@ -212,7 +212,8 @@ TEST_F(MindDataTestPipeline, TestUSPSIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -248,7 +249,8 @@ TEST_F(MindDataTestPipeline, TestUSPSIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -172,7 +172,8 @@ TEST_F(MindDataTestPipeline, TestWikiTextIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "text" column and drop others
std::vector<std::string> columns = {"text"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -216,7 +217,8 @@ TEST_F(MindDataTestPipeline, TestWikiTextIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -195,7 +195,8 @@ TEST_F(MindDataTestPipeline, TestYelpReviewDatasetIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "text" column and drop others
std::vector<std::string> columns = {"text"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -229,7 +230,8 @@ TEST_F(MindDataTestPipeline, TestYelpReviewDatasetIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -134,7 +134,8 @@ TEST_F(MindDataTestPipeline, TestYesNoDatasetIteratorOneColumn) {
// Create an iterator over the result of the above dataset
// Only select "waveform" column and drop others
std::vector<std::string> columns = {"waveform"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
@ -170,7 +171,8 @@ TEST_F(MindDataTestPipeline, TestYesNoGetDatasetIteratorWrongColumn) {
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
std::shared_ptr<ProjectDataset> project_ds = ds->Project(columns);
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}

View File

@ -980,8 +980,7 @@ TEST_F(MindDataTestPipeline, TestJiebaTokenizerSuccess2) {
EXPECT_NE(jieba_tokenizer, nullptr);
// Create Map operation on ds
ds = ds->Map({jieba_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"},
{"token", "offsets_start", "offsets_limit"});
ds = ds->Map({jieba_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -2804,8 +2803,7 @@ TEST_F(MindDataTestPipeline, TestRegexTokenizerSuccess1) {
EXPECT_NE(regex_tokenizer, nullptr);
// Create Map operation on ds
ds = ds->Map({regex_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"},
{"token", "offsets_start", "offsets_limit"});
ds = ds->Map({regex_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -2939,8 +2937,7 @@ TEST_F(MindDataTestPipeline, TestUnicodeCharTokenizerSuccess1) {
EXPECT_NE(unicodechar_tokenizer, nullptr);
// Create Map operation on ds
ds = ds->Map({unicodechar_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"},
{"token", "offsets_start", "offsets_limit"});
ds = ds->Map({unicodechar_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -3570,8 +3567,7 @@ TEST_F(MindDataTestPipeline, TestUnicodeScriptTokenizerSuccess2) {
EXPECT_NE(unicodescript_tokenizer, nullptr);
// Create Map operation on ds
ds = ds->Map({unicodescript_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"},
{"token", "offsets_start", "offsets_limit"});
ds = ds->Map({unicodescript_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -3642,8 +3638,7 @@ TEST_F(MindDataTestPipeline, TestUnicodeScriptTokenizerSuccess3) {
EXPECT_NE(unicodescript_tokenizer, nullptr);
// Create Map operation on ds
ds = ds->Map({unicodescript_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"},
{"token", "offsets_start", "offsets_limit"});
ds = ds->Map({unicodescript_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -3766,8 +3761,7 @@ TEST_F(MindDataTestPipeline, TestWhitespaceTokenizerSuccess1) {
EXPECT_NE(white_tokenizer, nullptr);
// Create Map operation on ds
ds = ds->Map({white_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"},
{"token", "offsets_start", "offsets_limit"});
ds = ds->Map({white_tokenizer}, {"text"}, {"token", "offsets_start", "offsets_limit"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset

View File

@ -43,7 +43,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentSuccess1Shr) {
auto bound_box_augment_op = std::make_shared<vision::BoundingBoxAugment>(random_rotation_op, 1.0);
// Create a Map operation on ds
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -86,7 +86,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentSuccess2Auto) {
auto bound_box_augment_op(new vision::BoundingBoxAugment({random_rotation_op}, 1.0));
// Create a Map operation on ds
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -132,7 +132,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentSuccess3Obj) {
vision::BoundingBoxAugment bound_box_augment_op = vision::BoundingBoxAugment({random_rotation_op}, 1.0);
// Create a Map operation on ds
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -176,7 +176,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentFail1) {
auto bound_box_augment_op = std::make_shared<vision::BoundingBoxAugment>(random_rotation_op, -1.0);
// Create a Map operation on ds
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -204,7 +204,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentFail2) {
auto bound_box_augment_op = std::make_shared<vision::BoundingBoxAugment>(random_rotation_op, 2.0);
// Create a Map operation on ds
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -229,7 +229,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentFail3) {
auto bound_box_augment_op = std::make_shared<vision::BoundingBoxAugment>(nullptr, 0.5);
// Create a Map operation on ds
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -258,7 +258,7 @@ TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentFail4) {
auto bound_box_augment_op = std::make_shared<vision::BoundingBoxAugment>(random_rotation_op, 0.25);
// Create a Map operation on ds
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({bound_box_augment_op}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset

View File

@ -252,7 +252,7 @@ TEST_F(MindDataTestPipeline, TestResizeWithBBoxSuccess) {
// Note: No need to check for output after calling API class constructor
// Create a Map operation on ds
ds = ds->Map({resize_with_bbox_op, resize_with_bbox_op1}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({resize_with_bbox_op, resize_with_bbox_op1}, {"image", "bbox"}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset

View File

@ -548,7 +548,7 @@ TEST_F(MindDataTestPipeline, TestRandomCropWithBboxSuccess) {
std::vector<int32_t>{128, 128});
// Create a Map operation on ds
ds = ds->Map({random_crop}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({random_crop}, {"image", "bbox"}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -688,7 +688,7 @@ TEST_F(MindDataTestPipeline, TestRandomHorizontalFlipWithBBoxSuccess) {
std::make_shared<vision::RandomHorizontalFlipWithBBox>(0.5);
// Create a Map operation on ds
ds = ds->Map({random_horizontal_flip_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({random_horizontal_flip_op}, {"image", "bbox"}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -1767,7 +1767,7 @@ TEST_F(MindDataTestPipeline, TestRandomVerticalFlipWithBBoxSuccess) {
std::shared_ptr<TensorTransform> random_vertical_flip_op = std::make_shared<vision::RandomVerticalFlipWithBBox>(0.4);
// Create a Map operation on ds
ds = ds->Map({random_vertical_flip_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({random_vertical_flip_op}, {"image", "bbox"}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset

View File

@ -72,7 +72,7 @@ TEST_F(MindDataTestSlicePatches, TestSlicePatchesPipeline) {
auto slice_patches = std::make_shared<vision::SlicePatches>(2, 2);
// Create a Map operation on ds
ds = ds->Map({slice_patches}, {"image"}, {"img0", "img1", "img2", "img3"}, {"img0", "img1", "img2", "img3"});
ds = ds->Map({slice_patches}, {"image"}, {"img0", "img1", "img2", "img3"});
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
@ -82,7 +82,8 @@ TEST_F(MindDataTestSlicePatches, TestSlicePatchesPipeline) {
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
std::shared_ptr<ProjectDataset> project_ds = ds->Project({"img0", "img1", "img2", "img3"});
std::shared_ptr<Iterator> iter = project_ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row

View File

@ -205,7 +205,7 @@ TEST_F(MindDataTestPipeline, TestUniformAugmentFail1num_ops) {
std::vector<std::shared_ptr<TensorTransform>>{random_crop_op, center_crop_op}, 0);
// Create a Map operation on ds
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -234,7 +234,7 @@ TEST_F(MindDataTestPipeline, TestUniformAugmentFail2num_ops) {
std::vector<std::shared_ptr<TensorTransform>>{random_crop_op, center_crop_op}, 3);
// Create a Map operation on ds
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -263,7 +263,7 @@ TEST_F(MindDataTestPipeline, TestUniformAugmentFail3transforms) {
std::vector<std::shared_ptr<TensorTransform>>{random_crop_op}, 1);
// Create a Map operation on ds
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -291,7 +291,7 @@ TEST_F(MindDataTestPipeline, TestUniformAugmentFail4transforms) {
std::vector<std::shared_ptr<TensorTransform>>{random_crop_op, nullptr}, 2);
// Create a Map operation on ds
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
@ -316,7 +316,7 @@ TEST_F(MindDataTestPipeline, TestUniformAugmentFail5transforms) {
auto uniform_aug_op = std::make_shared<vision::UniformAugment>(list, 1);
// Create a Map operation on ds
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"});
ds = ds->Map({uniform_aug_op}, {"image", "bbox"}, {"image", "bbox"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset

View File

@ -323,14 +323,14 @@ TEST_F(MindDataTestCallback, TestSelectedCallback) {
ASSERT_NE(ds, nullptr);
ds->SetNumWorkers(1);
// config mapOp
ds = ds->Map({std::make_shared<transforms::TypeCast>(mindspore::DataType::kNumberTypeUInt64)}, {"label"}, {}, {},
ds = ds->Map({std::make_shared<transforms::TypeCast>(mindspore::DataType::kNumberTypeUInt64)}, {"label"}, {},
nullptr, {tst_cb});
ds->SetNumWorkers(1);
ASSERT_NE(ds, nullptr);
ds = ds->Repeat(2);
ASSERT_NE(ds, nullptr);
int32_t num_epochs = 2;
auto itr = ds->CreateIterator({}, num_epochs);
auto itr = ds->CreateIterator(num_epochs);
for (int ep_num = 0; ep_num < num_epochs; ++ep_num) {
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(itr->GetNextRow(&row));
@ -365,7 +365,7 @@ TEST_F(MindDataTestCallback, TestCAPICallback) {
ASSERT_OK(schema->add_column("label", mindspore::DataType::kNumberTypeUInt32, {}));
std::shared_ptr<Dataset> ds = RandomData(44, schema);
ASSERT_NE(ds, nullptr);
ds = ds->Map({std::make_shared<transforms::TypeCast>(mindspore::DataType::kNumberTypeUInt64)}, {"label"}, {}, {},
ds = ds->Map({std::make_shared<transforms::TypeCast>(mindspore::DataType::kNumberTypeUInt64)}, {"label"}, {},
nullptr, {cb1});
ASSERT_NE(ds, nullptr);
ds = ds->Repeat(2);

View File

@ -123,12 +123,14 @@ TEST_F(MindDataTestTreeAdapter, TestProjectMapTreeAdapter) {
EXPECT_NE(one_hot, nullptr);
// Create a Map operation, this will automatically add a project after map
ds = ds->Map({one_hot}, {"label"}, {"label"}, {"label"});
ds = ds->Map({one_hot}, {"label"}, {"label"});
EXPECT_NE(ds, nullptr);
std::shared_ptr<ProjectDataset> project_ds = ds->Project({"label"});
auto tree_adapter = std::make_shared<TreeAdapter>();
Status rc = tree_adapter->Compile(ds->IRNode(), 2);
Status rc = tree_adapter->Compile(project_ds->IRNode(), 2);
EXPECT_TRUE(rc.IsOk());

View File

@ -53,9 +53,11 @@ class MindDataTestProfiler : public UT::DatasetOpTesting {
EXPECT_NE(one_hot, nullptr);
// Create a Map operation, this will automatically add a project after map
ds = ds->Map({one_hot}, {"label"}, {"label"}, {"label"});
ds = ds->Map({one_hot}, {"label"}, {"label"});
EXPECT_NE(ds, nullptr);
ds = ds->Project({"label"});
ds = ds->Take(op_input);
EXPECT_NE(ds, nullptr);
@ -98,7 +100,7 @@ TEST_F(MindDataTestProfiler, TestProfilerManager1) {
EXPECT_NE(one_hot, nullptr);
// Create a Map operation, this will automatically add a project after map
ds = ds->Map({one_hot}, {"label"}, {"label"}, {"label"});
ds = ds->Map({one_hot}, {"label"}, {"label"});
EXPECT_NE(ds, nullptr);
ds = ds->Take(4);
@ -107,7 +109,6 @@ TEST_F(MindDataTestProfiler, TestProfilerManager1) {
ds = ds->Batch(2, true);
EXPECT_NE(ds, nullptr);
// No columns are specified, use all columns
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
@ -160,7 +161,6 @@ TEST_F(MindDataTestProfiler, TestProfilerManager2) {
ds = ds->Batch(2, false);
EXPECT_NE(ds, nullptr);
// No columns are specified, use all columns
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
@ -202,9 +202,7 @@ TEST_F(MindDataTestProfiler, TestProfilerManagerByEpoch) {
std::shared_ptr<Dataset> ds = set_dataset(20);
// No columns are specified, use all columns
std::vector<std::string> columns = {};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, 3);
std::shared_ptr<Iterator> iter = ds->CreateIterator(3);
EXPECT_NE(iter, nullptr);
std::vector<uint8_t> cpu_result;
@ -287,9 +285,7 @@ TEST_F(MindDataTestProfiler, TestProfilerManagerByStep) {
std::shared_ptr<Dataset> ds = set_dataset(20);
// No columns are specified, use all columns
std::vector<std::string> columns = {};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, 3);
std::shared_ptr<Iterator> iter = ds->CreateIterator(3);
EXPECT_NE(iter, nullptr);
std::vector<uint8_t> cpu_result;
@ -381,9 +377,7 @@ TEST_F(MindDataTestProfiler, TestProfilerManagerByTime) {
std::shared_ptr<Dataset> ds = set_dataset(20);
// No columns are specified, use all columns
std::vector<std::string> columns = {};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, 5);
std::shared_ptr<Iterator> iter = ds->CreateIterator(5);
EXPECT_NE(iter, nullptr);
std::vector<uint8_t> cpu_result;

View File

@ -52,6 +52,7 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False):
# map to apply ops
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
data_voc2 = data_voc2.project(["image", "bbox"])
filename = "bounding_box_augment_rotation_c_result.npz"
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
@ -84,6 +85,7 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False):
# map to apply ops
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
data_voc2 = data_voc2.project(["image", "bbox"])
filename = "bounding_box_augment_crop_c_result.npz"
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
@ -116,6 +118,7 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False):
# map to apply ops
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
data_voc2 = data_voc2.project(["image", "bbox"])
filename = "bounding_box_augment_valid_ratio_c_result.npz"
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
@ -170,6 +173,7 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False):
data_voc1 = helper_perform_ops_bbox_edgecase_float(data_voc1)
data_voc2 = helper_perform_ops_bbox_edgecase_float(data_voc2)
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
data_voc2 = data_voc2.project(["image", "bbox"])
filename = "bounding_box_augment_valid_edge_c_result.npz"
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)

View File

@ -124,8 +124,7 @@ def test_lambdas():
def test_config(arr, input_columns, output_cols, op_list):
data = ds.NumpySlicesDataset(
arr, column_names=input_columns, shuffle=False)
data = data.map(operations=op_list, input_columns=input_columns, output_columns=output_cols,
column_order=output_cols)
data = data.map(operations=op_list, input_columns=input_columns, output_columns=output_cols)
res = []
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
for col_name in output_cols:
@ -159,8 +158,7 @@ def test_c_py_compose_transforms_module():
def test_config(arr, input_columns, output_cols, op_list):
data = ds.NumpySlicesDataset(
arr, column_names=input_columns, shuffle=False)
data = data.map(operations=op_list, input_columns=input_columns, output_columns=output_cols,
column_order=output_cols)
data = data.map(operations=op_list, input_columns=input_columns, output_columns=output_cols)
res = []
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
for col_name in output_cols:

View File

@ -94,8 +94,8 @@ def test_concatenate_op_multi_input_string():
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor, append=append_tensor)
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"],
output_columns=["out1"])
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], output_columns=["out1"])
data = data.project(["out1"])
expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"])
for data_row in data.create_tuple_iterator(num_epochs=1, output_numpy=True):
np.testing.assert_array_equal(data_row[0], expected)
@ -114,8 +114,8 @@ def test_concatenate_op_multi_input_numeric():
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor)
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"],
output_columns=["out1"])
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], output_columns=["out1"])
data = data.project(["out1"])
expected = np.array([3, 5, 1, 2, 3, 4])
for data_row in data.create_tuple_iterator(num_epochs=1, output_numpy=True):
np.testing.assert_array_equal(data_row[0], expected)

View File

@ -293,7 +293,8 @@ def test_generator_8():
data1 = data1.map(operations=(lambda x: x * 3), input_columns="col0", output_columns="out0",
num_parallel_workers=2)
data1 = data1.map(operations=(lambda x: (x * 7, x)), input_columns="col1", output_columns=["out1", "out2"],
num_parallel_workers=2, column_order=["out0", "out1", "out2"])
num_parallel_workers=2)
data1 = data1.project(["out0", "out1", "out2"])
data1 = data1.map(operations=(lambda x: x + 1), input_columns="out2", output_columns="out2",
num_parallel_workers=2)
@ -325,8 +326,6 @@ def test_generator_9():
num_parallel_workers=4)
# Expected column order is not changed.
# data1 = data[0] is "image" and data[1] is "label"
# data2 = data[0] is "label" and data[1] is "image"
i = 0
for data1, data2 in zip(data1, data2): # each data is a dictionary
golden = np.array([i])
@ -352,7 +351,8 @@ def test_generator_10():
# apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"],
column_order=['col0', 'out1', 'out2'], num_parallel_workers=2)
num_parallel_workers=2)
data1 = data1.project(['col0', 'out1', 'out2'])
# Expected column order is |col0|out1|out2|
i = 0
@ -369,21 +369,21 @@ def test_generator_10():
def test_generator_11():
"""
Feature: GeneratorDataset
Description: Test map column order len(input_columns) != len(output_columns), column_order drops some columns
Description: Test .project drops some columns
Expectation: The dataset is processed as expected
"""
logger.info("Test map column order when len(input_columns) != len(output_columns), "
"and column_order drops some columns.")
logger.info("Test .project drops some columns.")
# apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"],
column_order=['out1', 'out2'], num_parallel_workers=2)
num_parallel_workers=2)
data1 = data1.project(["out1", "out2"])
# Expected column order is |out1|out2|
i = 0
for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
# len should be 2 because col0 is dropped (not included in column_order)
# len should be 2 because col0 is dropped
assert len(item) == 2
golden = np.array([[i, i + 1], [i + 2, i + 3]])
np.testing.assert_array_equal(item[0], golden)
@ -415,7 +415,8 @@ def test_generator_12():
i = i + 1
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
data1 = data1.map(operations=(lambda x: (x * 5)), column_order=["col1", "col0"], num_parallel_workers=2)
data1 = data1.map(operations=(lambda x: (x * 5)), num_parallel_workers=2)
data1 = data1.project(["col1", "col0"])
# Expected column order is |col0|col1|
i = 0
@ -451,7 +452,7 @@ def test_generator_13():
i = i + 1
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# len should be 2 because col0 is dropped (not included in column_order)
# len should be 2 because col0 is dropped
assert len(item) == 2
golden = np.array([i * 5])
np.testing.assert_array_equal(item["out0"], golden)
@ -587,7 +588,7 @@ def test_generator_18():
i = i + 1
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# len should be 2 because col0 is dropped (not included in column_order)
# len should be 2 because col0 is dropped
assert len(item) == 2
golden = np.array([i * 5])
np.testing.assert_array_equal(item["out0"], golden)
@ -601,7 +602,7 @@ def test_generator_19():
Description: Test multiprocessing 2 different large columns
Expectation: The dataset is processed as expected
"""
logger.info("Test map column order when input_columns is None.")
logger.info("Test map multiprocessing 2 different large columns.")
# apply dataset operations
data1 = ds.GeneratorDataset(DatasetGeneratorLarge(), ["col0", "col1"], python_multiprocessing=True, shuffle=False)
@ -713,24 +714,6 @@ def test_generator_error_2():
assert "Data type of 1th item of the input or its converted Numpy array is expected" in str(info.value)
def test_generator_error_3():
"""
Feature: GeneratorDataset
Description: Test GeneratorDataset when len(input_columns) != len(output_columns) and column_order is not specified
Expectation: Error is raised as expected
"""
with pytest.raises(ValueError) as info:
# apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"], output_columns=["out1", "out2"],
num_parallel_workers=2)
for _ in data1:
pass
assert "When length of input_columns and output_columns are not equal, column_order must be specified." in \
str(info.value)
def test_generator_error_4():
"""
Feature: GeneratorDataset

View File

@ -882,7 +882,7 @@ def test_imagefolder_exception():
data = ds.ImageFolderDataset(DATA_DIR)
data = data.map(operations=exception_func2, input_columns=["image", "label"],
output_columns=["image", "label", "label1"],
column_order=["image", "label", "label1"], num_parallel_workers=1)
num_parallel_workers=1)
for _ in data.__iter__():
pass
assert False

View File

@ -692,7 +692,7 @@ def test_imdb_exception():
data = ds.IMDBDataset(DATA_DIR)
data = data.map(operations=exception_func2, input_columns=["text", "label"],
output_columns=["text", "label", "label1"],
column_order=["text", "label", "label1"], num_parallel_workers=1)
num_parallel_workers=1)
for _ in data.__iter__():
pass
assert False

View File

@ -568,7 +568,6 @@ def test_lsun_exception_map():
data = data.map(operations=exception_func2,
input_columns=["image", "label"],
output_columns=["image", "label", "label1"],
column_order=["image", "label", "label1"],
num_parallel_workers=1)
for _ in data.__iter__():
pass

View File

@ -448,7 +448,6 @@ def test_omniglot_exception():
data = data.map(operations=exception_func2,
input_columns=["image", "label"],
output_columns=["image", "label", "label1"],
column_order=["image", "label", "label1"],
num_parallel_workers=1)
for _ in data.__iter__():
pass

View File

@ -135,8 +135,8 @@ def test_voc_meta_column():
return img, img, label
data3 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True)
data3 = data3.map(operations=pyfunc2, input_columns=["image", "target"], output_columns=["img1", "img2", "label"],
column_order=["_meta-filename", "img1", "img2", "label"])
data3 = data3.map(operations=pyfunc2, input_columns=["image", "target"], output_columns=["img1", "img2", "label"])
data3 = data3.project(["_meta-filename", "img1", "img2", "label"])
data3 = data3.rename("_meta-filename", "filename")
num = 0
for item in data3.create_tuple_iterator(num_epochs=1, output_numpy=True):
@ -148,8 +148,8 @@ def test_voc_meta_column():
return img
data4 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True)
data4 = data4.map(operations=pyfunc3, input_columns=["image", "target"], output_columns=["img1"],
column_order=["_meta-filename", "img1"])
data4 = data4.map(operations=pyfunc3, input_columns=["image", "target"], output_columns=["img1"])
data4 = data4.project(["_meta-filename", "img1"])
data4 = data4.rename("_meta-filename", "filename")
num = 0
for item in data4.create_tuple_iterator(num_epochs=1, output_numpy=True):

View File

@ -24,8 +24,7 @@ import mindspore.dataset.transforms as ops
def compare(array):
data = ds.NumpySlicesDataset([array], column_names="x")
array = np.array(array)
data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"],
column_order=["x", "y"])
data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"])
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
np.testing.assert_array_equal(array, d["x"])
np.testing.assert_array_equal(array, d["y"])

View File

@ -159,8 +159,8 @@ def test_get_column_name_map():
data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1"])
assert data.get_col_names() == ["col1", "label"]
data = ds.Cifar10Dataset(CIFAR10_DIR)
data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1", "col2"],
column_order=["col2", "col1"])
data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1", "col2"])
data = data.project(["col2", "col1"])
assert data.get_col_names() == ["col2", "col1"]

View File

@ -35,7 +35,7 @@ def test_magphase_pipeline():
dataset = ds.NumpySlicesDataset(data1, column_names=["col1"], shuffle=False)
magphase_window = audio.Magphase(power=1.0)
dataset = dataset.map(operations=magphase_window, input_columns=["col1"],
output_columns=["mag", "phase"], column_order=["mag", "phase"])
output_columns=["mag", "phase"])
for data1, data2 in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True):
assert abs(data1[0] - expected[0]) < 0.00001
assert abs(data1[1] - expected[1]) < 0.00001

View File

@ -342,6 +342,28 @@ def test_python_map_mp_seed_repeatability(set_seed_to=1337, set_num_parallel_wor
ds.config.set_enable_shared_mem(original_enable_shared_mem)
def test_map_with_deprecated_parameter():
"""
Feature: Map op
Description: map with deprecated parameter
Expectation: ValueError
"""
data1 = np.array(np.random.sample(size=(300, 300, 3)) * 255, dtype=np.uint8)
data2 = np.array(np.random.sample(size=(300, 300, 3)) * 255, dtype=np.uint8)
data3 = np.array(np.random.sample(size=(300, 300, 3)) * 255, dtype=np.uint8)
data4 = np.array(np.random.sample(size=(300, 300, 3)) * 255, dtype=np.uint8)
label = [1, 2, 3, 4]
dataset = ds.NumpySlicesDataset(([data1, data2, data3, data4], label), ["data", "label"])
with pytest.raises(ValueError) as info:
dataset = dataset.map(operations=[(lambda x: (x + 1, x / 255))],
input_columns=["data"],
output_columns=["data2", "data3"],
column_order=["data2", "data3"])
assert "The parameter 'column_order' had been deleted in map operation." in str(info.value)
if __name__ == '__main__':
test_map_c_transform_exception()
test_map_py_transform_exception()
@ -351,3 +373,4 @@ if __name__ == '__main__':
test_c_map_randomness_repeatability_with_shards()
test_python_map_mp_repeatability(num_parallel_workers=4, num_samples=4)
test_python_map_mp_seed_repeatability()
test_map_with_deprecated_parameter()

View File

@ -133,8 +133,7 @@ def test_offload_multi_column():
dataset = ds.ImageFolderDataset(DATA_DIR)
dataset = dataset.map(operations=copy_column, input_columns=["image", "label"],
output_columns=["image1", "image2", "label"],
column_order=["image1", "image2", "label"])
output_columns=["image1", "image2", "label"])
dataset = dataset.map(operations=[C.Decode()], input_columns="image1")
dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image1")
dataset = dataset.map(operations=[C.Decode()], input_columns="image2")
@ -143,8 +142,7 @@ def test_offload_multi_column():
dataset_offload = ds.ImageFolderDataset(DATA_DIR)
dataset_offload = dataset_offload.map(operations=copy_column, input_columns=["image", "label"],
output_columns=["image1", "image2", "label"],
column_order=["image1", "image2", "label"])
output_columns=["image1", "image2", "label"])
dataset_offload = dataset_offload.map(operations=[C.Decode()], input_columns="image1")
dataset_offload = dataset_offload.map(operations=[C.HWC2CHW()], input_columns="image1", offload=True)
dataset_offload = dataset_offload.map(operations=[C.Decode()], input_columns="image2")
@ -171,7 +169,7 @@ def test_offload_column_mapping():
dataset = ds.ImageFolderDataset(DATA_DIR)
dataset = dataset.map(operations=copy_column, input_columns=["image", "label"],
output_columns=["image1", "image2", "label"], column_order=["image1", "image2", "label"])
output_columns=["image1", "image2", "label"])
dataset = dataset.map(operations=[C.Decode()], input_columns="image2")
dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image2", offload=True)

View File

@ -51,7 +51,8 @@ def test_one_hot():
# First dataset
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
one_hot_op = data_trans.OneHot(num_classes=depth)
data1 = data1.map(operations=one_hot_op, input_columns=["label"], column_order=["label"])
data1 = data1.map(operations=one_hot_op, input_columns=["label"])
data1 = data1.project(["label"])
# Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["label"], shuffle=False)

View File

@ -33,8 +33,8 @@ def test_map_reorder0():
# Generator -> Map
data0 = ds.GeneratorDataset(generator_mc, ["col0", "col1"])
data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out",
column_order=["col1", "out"])
data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out")
data0 = data0.project(["col1", "out"])
for item in data0.create_tuple_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
assert item == [np.array(1), np.array(0)]
@ -55,11 +55,14 @@ def test_map_reorder1():
# Three map and zip
data0 = ds.GeneratorDataset(generator_mc, ["a0", "a1", "a2"])
data0 = data0.map(operations=(lambda x: x), input_columns="a0", column_order=["a2", "a1", "a0"])
data0 = data0.map(operations=(lambda x: x), input_columns="a0")
data0 = data0.project(["a2", "a1", "a0"])
data1 = ds.GeneratorDataset(generator_mc, ["b0", "b1", "b2"])
data1 = data1.map(operations=(lambda x: x), input_columns="b0", column_order=["b1", "b2", "b0"])
data1 = data1.map(operations=(lambda x: x), input_columns="b0")
data1 = data1.project(["b1", "b2", "b0"])
data2 = ds.zip((data0, data1))
data2 = data2.map(operations=(lambda x: x), input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"])
data2 = data2.map(operations=(lambda x: x), input_columns="a0")
data2 = data2.project(["b2", "a2", "b1", "a1", "b0", "a0"])
for item in data2.create_tuple_iterator(num_epochs=1, output_numpy=True):
assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)]

View File

@ -204,7 +204,7 @@ def test_case_map_project_map_project():
save_and_check_tuple(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
def test_column_order():
def test_project_operation():
"""
Feature: Project op
Description: Test Project op where the output dict should maintain the insertion order
@ -228,4 +228,4 @@ def test_column_order():
if __name__ == '__main__':
test_column_order()
test_project_opreation()

View File

@ -58,8 +58,7 @@ def test_case_1():
# apply dataset operations
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
data1 = data1.map(operations=(lambda x: (x, x + x)), input_columns=col, output_columns=["out0", "out1"],
column_order=["out0", "out1"])
data1 = data1.map(operations=(lambda x: (x, x + x)), input_columns=col, output_columns=["out0", "out1"])
i = 0
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
@ -84,8 +83,7 @@ def test_case_2():
# apply dataset operations
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
data1 = data1.map(operations=(lambda x, y: x + y), input_columns=col, output_columns="out",
column_order=["out"])
data1 = data1.map(operations=(lambda x, y: x + y), input_columns=col, output_columns="out")
i = 0
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
@ -109,7 +107,7 @@ def test_case_3():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col,
output_columns=["out0", "out1", "out2"], column_order=["out0", "out1", "out2"])
output_columns=["out0", "out1", "out2"])
i = 0
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
@ -137,8 +135,7 @@ def test_case_4():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col,
output_columns=["out0", "out1", "out2"], num_parallel_workers=4,
column_order=["out0", "out1", "out2"])
output_columns=["out0", "out1", "out2"], num_parallel_workers=4)
i = 0
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
@ -244,7 +241,6 @@ def test_case_8():
data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col,
output_columns=["out0", "out1", "out2"], num_parallel_workers=4,
column_order=["out0", "out1", "out2"],
python_multiprocessing=True)
i = 0
@ -333,7 +329,7 @@ def test_pyfunc_implicit_compose():
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
data1 = data1.map(operations=[(lambda x, y: (x, x + y, x + y + 1)), (lambda x, y, z: (x, y, z))], input_columns=col,
output_columns=["out0", "out1", "out2"], column_order=["out0", "out1", "out2"])
output_columns=["out0", "out1", "out2"])
i = 0
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary

View File

@ -598,7 +598,7 @@ def test_random_crop_09_c():
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data = data.map(operations=ops.Duplicate(), input_columns=["image"],
output_columns=["image", "image_copy"], column_order=["image", "image_copy"])
output_columns=["image", "image_copy"])
random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200])
decode_op = vision.Decode()

View File

@ -466,7 +466,7 @@ def test_random_crop_and_resize_07():
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data = data.map(operations=ops.Duplicate(), input_columns=["image"],
output_columns=["image", "image_copy"], column_order=["image", "image_copy"])
output_columns=["image", "image_copy"])
random_crop_and_resize_op = vision.RandomResizedCrop((256, 512), (2, 2), (1, 3))
decode_op = vision.Decode()

View File

@ -53,6 +53,7 @@ def test_random_resized_crop_with_bbox_op_c(plot_vis=False):
# map to apply ops
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
data_voc2 = data_voc2.project(["image", "bbox"])
filename = "random_resized_crop_with_bbox_01_c_result.npz"
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)

View File

@ -98,6 +98,7 @@ def test_random_crop_with_bbox_op2_c(plot_vis=False):
# map to apply ops
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
data_voc2 = data_voc2.project(["image", "bbox"])
filename = "random_crop_with_bbox_01_c_result.npz"
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)

View File

@ -252,7 +252,7 @@ def test_random_horizontal_op_1():
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=[
"image"], shuffle=False)
data = data.map(operations=ops.Duplicate(), input_columns=["image"],
output_columns=["image", "image_copy"], column_order=["image", "image_copy"])
output_columns=["image", "image_copy"])
random_horizontal_op = vision.RandomHorizontalFlip(1.0)
decode_op = vision.Decode()

View File

@ -95,6 +95,7 @@ def test_random_horizontal_flip_with_bbox_valid_rand_c(plot_vis=False):
# map to apply ops
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
data_voc2 = data_voc2.project(["image", "bbox"])
filename = "random_horizontal_flip_with_bbox_01_c_result.npz"
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)

View File

@ -92,7 +92,7 @@ def test_random_resize_op_1():
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data = data.map(operations=ops.Duplicate(), input_columns=["image"],
output_columns=["image", "image_copy"], column_order=["image", "image_copy"])
output_columns=["image", "image_copy"])
resize_op = vision.RandomResize(10)
decode_op = vision.Decode()

View File

@ -49,6 +49,7 @@ def test_random_resize_with_bbox_op_voc_c(plot_vis=False):
# map to apply ops
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
data_voc2 = data_voc2.project(["image", "bbox"])
filename = "random_resize_with_bbox_op_01_c_voc_result.npz"
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
@ -82,6 +83,7 @@ def test_random_resize_with_bbox_op_rand_coco_c(plot_vis=False):
# map to apply ops
data_coco2 = helper_perform_ops_bbox(data_coco2, test_op)
data_coco2 = data_coco2.project(["image", "bbox"])
filename = "random_resize_with_bbox_op_01_c_coco_result.npz"
save_and_check_md5(data_coco2, filename, generate_golden=GENERATE_GOLDEN)

View File

@ -232,7 +232,7 @@ def test_random_vertical_op_1():
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data = data.map(operations=ops.Duplicate(), input_columns=["image"],
output_columns=["image", "image_copy"], column_order=["image", "image_copy"])
output_columns=["image", "image_copy"])
random_vertical_op = vision.RandomVerticalFlip(1.0)
decode_op = vision.Decode()

View File

@ -95,6 +95,7 @@ def test_random_vertical_flip_with_bbox_op_rand_c(plot_vis=False):
# map to apply ops
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
data_voc2 = data_voc2.project(["image", "bbox"])
filename = "random_vertical_flip_with_bbox_01_c_result.npz"
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)

View File

@ -48,6 +48,7 @@ def test_resize_with_bbox_op_voc_c(plot_vis=False):
# map to apply ops
data_voc2 = helper_perform_ops_bbox(data_voc2, test_op)
data_voc2 = data_voc2.project(["image", "bbox"])
filename = "resize_with_bbox_op_01_c_voc_result.npz"
save_and_check_md5(data_voc2, filename, generate_golden=GENERATE_GOLDEN)
@ -75,6 +76,7 @@ def test_resize_with_bbox_op_coco_c(plot_vis=False):
# map to apply ops
data_coco2 = helper_perform_ops_bbox(data_coco2, test_op)
data_coco2 = data_coco2.project(["image", "bbox"])
filename = "resize_with_bbox_op_01_c_coco_result.npz"
save_and_check_md5(data_coco2, filename, generate_golden=GENERATE_GOLDEN)

View File

@ -93,7 +93,7 @@ def slice_to_patches(ori_size, num_h, num_w, pad_or_drop, fill_value=0, plot=Fal
dataset1 = dataset1.map(operations=decode_op, input_columns=["image"])
dataset1 = dataset1.map(operations=resize_op, input_columns=["image"])
dataset1 = dataset1.map(operations=slice_patches_op,
input_columns=["image"], output_columns=cols, column_order=cols)
input_columns=["image"], output_columns=cols)
# Second dataset
dataset2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
dataset2 = dataset2.map(operations=decode_op, input_columns=["image"])
@ -101,7 +101,7 @@ def slice_to_patches(ori_size, num_h, num_w, pad_or_drop, fill_value=0, plot=Fal
func_slice_patches = functools.partial(
slice_patches, num_h=num_h, num_w=num_w, pad_or_drop=pad_or_drop, fill_value=fill_value)
dataset2 = dataset2.map(operations=func_slice_patches,
input_columns=["image"], output_columns=cols, column_order=cols)
input_columns=["image"], output_columns=cols)
num_iter = 0
patches_c = []
@ -186,7 +186,6 @@ def test_slice_patches_08():
dataset = ds.NumpySlicesDataset(np_data, column_names=["image"])
slice_patches_op = vision.SlicePatches(2, 2)
dataset = dataset.map(input_columns=["image"], output_columns=["img0", "img1", "img2", "img3"],
column_order=["img0", "img1", "img2", "img3"],
operations=slice_patches_op)
for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
patch_shape = item['img0'].shape
@ -226,7 +225,7 @@ def skip_test_slice_patches_11():
slice_patches_op = vision.SlicePatches(10, 13, mode.SliceMode.DROP)
cols = ['img' + str(x) for x in range(10*13)]
dataset = dataset.map(input_columns=["image"], output_columns=cols,
column_order=cols, operations=slice_patches_op)
operations=slice_patches_op)
for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
patch_shape = item['img0'].shape
assert patch_shape == (700, 538, 256)

View File

@ -44,8 +44,7 @@ def test_spectral_centroid_pipeline():
wav = [[[1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5]]]
dataset = ds.NumpySlicesDataset(wav, column_names=["audio"], shuffle=False)
out = audio.SpectralCentroid(sample_rate=44100, n_fft=8)
dataset = dataset.map(operations=out, input_columns=["audio"], output_columns=["SpectralCentroid"],
column_order=['SpectralCentroid'])
dataset = dataset.map(operations=out, input_columns=["audio"], output_columns=["SpectralCentroid"])
result = np.array([[[4436.1182, 3580.0718, 2902.4917, 3334.8962, 5199.8350, 6284.4814,
3580.0718, 2895.5659]]])
for data1 in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):

View File

@ -45,8 +45,7 @@ def test_spectrogram_pipeline():
wav = [[[1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5]]]
dataset = ds.NumpySlicesDataset(wav, column_names=["audio"], shuffle=False)
out = audio.Spectrogram(n_fft=8)
dataset = dataset.map(operations=out, input_columns=["audio"], output_columns=["Spectrogram"],
column_order=['Spectrogram'])
dataset = dataset.map(operations=out, input_columns=["audio"], output_columns=["Spectrogram"])
result = np.array([[[2.8015e+01, 1.2100e+02, 3.1354e+02, 1.6900e+02, 2.5000e+01,
1.0843e+01, 1.2100e+02, 3.3150e+02],
[3.2145e+00, 3.3914e+01, 9.4728e+01, 4.5914e+01, 9.9142e+00,

View File

@ -371,8 +371,7 @@ def test_process_string_pipeline():
data = np.array([["apple"], ["orange"], ["banana"], ["1"], ["2"], ["3"], ["a"], ["b"], ["c"]], dtype=dtype)
dataset = ds.NumpySlicesDataset(data, column_names=["text"])
assert dataset.output_types()[0].type == dtype
dataset = dataset.map(lambda e: (e, e), input_columns=["text"], output_columns=["text1", "text2"],
column_order=["text1", "text2"])
dataset = dataset.map(lambda e: (e, e), input_columns=["text"], output_columns=["text1", "text2"])
for i, item in enumerate(dataset.create_dict_iterator(num_epochs=1, output_numpy=True)):
item["text1"] = data[i]
item["text2"] = data[i]

View File

@ -106,8 +106,7 @@ def check_basic_tokenizer_with_offsets(first, last, expected_tokens, expected_of
with_offsets=True)
dataset = dataset.map(operations=basic_tokenizer, input_columns=['text'],
output_columns=['token', 'offsets_start', 'offsets_limit'],
column_order=['token', 'offsets_start', 'offsets_limit'])
output_columns=['token', 'offsets_start', 'offsets_limit'])
count = 0
for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
token = i['token']

View File

@ -214,8 +214,7 @@ def check_bert_tokenizer_with_offsets(first, last, expect_str,
unknown_token=unknown_token, lower_case=lower_case, keep_whitespace=keep_whitespace,
normalization_form=normalization_form, preserve_unused_token=preserve_unused_token, with_offsets=True)
dataset = dataset.map(operations=tokenizer_op, input_columns=['text'],
output_columns=['token', 'offsets_start', 'offsets_limit'],
column_order=['token', 'offsets_start', 'offsets_limit'])
output_columns=['token', 'offsets_start', 'offsets_limit'])
count = 0
for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
token = i['token']

View File

@ -282,7 +282,6 @@ def test_jieba_with_offsets_1():
jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
data = data.map(operations=jieba_op, input_columns=["text"],
output_columns=["token", "offsets_start", "offsets_limit"],
column_order=["token", "offsets_start", "offsets_limit"],
num_parallel_workers=1)
expect = ['今天天气', '太好了', '我们', '一起', '', '外面', '玩吧']
expected_offsets_start = [0, 12, 21, 27, 33, 36, 42]
@ -308,7 +307,6 @@ def test_jieba_with_offsets_1_1():
jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.HMM, with_offsets=True)
data = data.map(operations=jieba_op, input_columns=["text"],
output_columns=["token", "offsets_start", "offsets_limit"],
column_order=["token", "offsets_start", "offsets_limit"],
num_parallel_workers=1)
expect = ['今天', '天气', '', '', '', '我们', '一起', '', '外面', '', '']
expected_offsets_start = [0, 6, 12, 15, 18, 21, 27, 33, 36, 42, 45]
@ -333,7 +331,6 @@ def test_jieba_with_offsets_1_2():
jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MIX, with_offsets=True)
data = data.map(operations=jieba_op, input_columns=["text"],
output_columns=["token", "offsets_start", "offsets_limit"],
column_order=["token", "offsets_start", "offsets_limit"],
num_parallel_workers=1)
expect = ['今天天气', '太好了', '我们', '一起', '', '外面', '玩吧']
expected_offsets_start = [0, 12, 21, 27, 33, 36, 42]
@ -361,7 +358,6 @@ def test_jieba_with_offsets_2():
expect = ['男默女泪', '', '长江大桥']
data = data.map(operations=jieba_op, input_columns=["text"],
output_columns=["token", "offsets_start", "offsets_limit"],
column_order=["token", "offsets_start", "offsets_limit"],
num_parallel_workers=2)
expected_offsets_start = [0, 12, 15]
expected_offsets_limit = [12, 15, 27]
@ -387,7 +383,6 @@ def test_jieba_with_offsets_2_1():
jieba_op.add_word("男默女泪", 10)
data = data.map(operations=jieba_op, input_columns=["text"],
output_columns=["token", "offsets_start", "offsets_limit"],
column_order=["token", "offsets_start", "offsets_limit"],
num_parallel_workers=2)
expect = ['男默女泪', '', '长江大桥']
expected_offsets_start = [0, 12, 15]
@ -414,7 +409,6 @@ def test_jieba_with_offsets_2_2():
jieba_op.add_word("江大桥", 20000)
data = data.map(operations=jieba_op, input_columns=["text"],
output_columns=["token", "offsets_start", "offsets_limit"],
column_order=["token", "offsets_start", "offsets_limit"],
num_parallel_workers=2)
expect = ['江州', '市长', '江大桥', '参加', '', '长江大桥', '', '通车', '仪式']
expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51]
@ -444,7 +438,6 @@ def test_jieba_with_offsets_3():
jieba_op.add_dict(user_dict)
data = data.map(operations=jieba_op, input_columns=["text"],
output_columns=["token", "offsets_start", "offsets_limit"],
column_order=["token", "offsets_start", "offsets_limit"],
num_parallel_workers=1)
expect = ['男默女泪', '', '长江大桥']
expected_offsets_start = [0, 12, 15]
@ -475,7 +468,6 @@ def test_jieba_with_offsets_3_1():
jieba_op.add_dict(user_dict)
data = data.map(operations=jieba_op, input_columns=["text"],
output_columns=["token", "offsets_start", "offsets_limit"],
column_order=["token", "offsets_start", "offsets_limit"],
num_parallel_workers=1)
expect = ['男默女泪', '市长', '江大桥']
expected_offsets_start = [0, 12, 18]
@ -504,7 +496,6 @@ def test_jieba_with_offsets_4():
jieba_op.add_dict(dict_file)
data = data.map(operations=jieba_op, input_columns=["text"],
output_columns=["token", "offsets_start", "offsets_limit"],
column_order=["token", "offsets_start", "offsets_limit"],
num_parallel_workers=1)
expect = ['今天天气', '太好了', '我们', '一起', '', '外面', '玩吧']
expected_offsets_start = [0, 12, 21, 27, 33, 36, 42]
@ -532,7 +523,6 @@ def test_jieba_with_offsets_5():
jieba_op.add_word("江大桥", 20000)
data = data.map(operations=jieba_op, input_columns=["text"],
output_columns=["token", "offsets_start", "offsets_limit"],
column_order=["token", "offsets_start", "offsets_limit"],
num_parallel_workers=1)
expect = ['江州', '市长', '江大桥', '参加', '', '长江大桥', '', '通车', '仪式']
expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51]

View File

@ -64,8 +64,7 @@ def test_unicode_char_tokenizer_with_offsets():
dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
tokenizer = text.UnicodeCharTokenizer(with_offsets=True)
dataset = dataset.map(operations=tokenizer, input_columns=['text'],
output_columns=['token', 'offsets_start', 'offsets_limit'],
column_order=['token', 'offsets_start', 'offsets_limit'])
output_columns=['token', 'offsets_start', 'offsets_limit'])
tokens = []
expected_offsets_start = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
[0, 3, 6, 9, 12, 15], [0, 3, 6, 9, 10, 11, 12, 13, 14, 15, 16], [0, 1]]
@ -116,8 +115,7 @@ def test_whitespace_tokenizer_with_offsets():
dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
tokenizer = text.WhitespaceTokenizer(with_offsets=True)
dataset = dataset.map(operations=tokenizer, input_columns=['text'],
output_columns=['token', 'offsets_start', 'offsets_limit'],
column_order=['token', 'offsets_start', 'offsets_limit'])
output_columns=['token', 'offsets_start', 'offsets_limit'])
tokens = []
expected_offsets_start = [[0, 8, 11], [0], [0], [0]]
expected_offsets_limit = [[7, 10, 19], [18], [17], [0]]
@ -189,8 +187,7 @@ def test_unicode_script_tokenizer_with_offsets():
dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=False, with_offsets=True)
dataset = dataset.map(operations=tokenizer, input_columns=['text'],
output_columns=['token', 'offsets_start', 'offsets_limit'],
column_order=['token', 'offsets_start', 'offsets_limit'])
output_columns=['token', 'offsets_start', 'offsets_limit'])
tokens = []
expected_offsets_start = [[0, 8, 11, 18], [0, 15], [0, 9, 16], [0]]
expected_offsets_limit = [[7, 10, 18, 19], [15, 18], [9, 16, 17], [0]]
@ -218,8 +215,7 @@ def test_unicode_script_tokenizer_with_offsets2():
dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=True)
dataset = dataset.map(operations=tokenizer, input_columns=['text'],
output_columns=['token', 'offsets_start', 'offsets_limit'],
column_order=['token', 'offsets_start', 'offsets_limit'])
output_columns=['token', 'offsets_start', 'offsets_limit'])
tokens = []
expected_offsets_start = [[0, 7, 8, 10, 11, 18], [0, 15], [0, 9, 16], [0]]
expected_offsets_limit = [[7, 8, 10, 11, 18, 19], [15, 18], [9, 16, 17], [2]]
@ -370,8 +366,7 @@ def test_regex_tokenizer_with_offsets():
dataset = dataset.take(last - first + 1)
tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True)
dataset = dataset.map(operations=tokenizer_op, input_columns=['text'],
output_columns=['token', 'offsets_start', 'offsets_limit'],
column_order=['token', 'offsets_start', 'offsets_limit'])
output_columns=['token', 'offsets_start', 'offsets_limit'])
out_text = []
count = 0
for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):

View File

@ -127,8 +127,7 @@ def check_wordpiece_tokenizer_with_offsets(first, last, expect_str, expected_off
tokenizer_op = text.WordpieceTokenizer(vocab=vocab, with_offsets=True, unknown_token=unknown_token,
max_bytes_per_token=max_bytes_per_token)
dataset = dataset.map(operations=tokenizer_op, input_columns=['text'],
output_columns=['token', 'offsets_start', 'offsets_limit'],
column_order=['token', 'offsets_start', 'offsets_limit'])
output_columns=['token', 'offsets_start', 'offsets_limit'])
count = 0
for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
token = i['token']

View File

@ -24,8 +24,7 @@ import mindspore.dataset.transforms as ops
def compare(array, res, idx, cnt):
data = ds.NumpySlicesDataset([array], column_names="x")
data = data.batch(2)
data = data.map(operations=ops.Unique(), input_columns=["x"], output_columns=["x", "y", "z"],
column_order=["x", "y", "z"])
data = data.map(operations=ops.Unique(), input_columns=["x"], output_columns=["x", "y", "z"])
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
np.testing.assert_array_equal(res, d["x"])
np.testing.assert_array_equal(idx, d["y"])

View File

@ -429,20 +429,16 @@ def helper_perform_ops_bbox(data, test_op=None, edge_case=False):
operations=[lambda img, bboxes: (
img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype)), test_op],
input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"])
output_columns=["image", "bbox"])
return data.map(
operations=[lambda img, bboxes: (
img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))],
input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"])
output_columns=["image", "bbox"])
if test_op:
return data.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=[
"image", "bbox"],
column_order=["image", "bbox"])
output_columns=["image", "bbox"])
return data
@ -456,8 +452,7 @@ def helper_perform_ops_bbox_edgecase_float(data):
return data.map(operations=lambda img, bbox: (img, np.array(
[[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)),
input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"])
output_columns=["image", "bbox"])
def helper_test_visual_bbox(plot_vis, data1, data2):
@ -579,12 +574,10 @@ def check_bad_bbox(data, test_op, invalid_bbox_type, expected_error):
# map to use selected invalid bounding box type
data = data.map(operations=lambda img, bboxes: add_bad_bbox(img, bboxes, invalid_bbox_type),
input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"])
output_columns=["image", "bbox"])
# map to apply ops
data = data.map(operations=[test_op], input_columns=["image", "bbox"],
output_columns=["image", "bbox"],
column_order=["image", "bbox"]) # Add column for "bbox"
output_columns=["image", "bbox"])
for _, _ in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)):
break
except RuntimeError as error:

Some files were not shown because too many files have changed in this diff Show More