update chinese doc
This commit is contained in:
parent
2076a8f081
commit
84bc135983
|
@ -25,44 +25,157 @@ mindspore.dataset.CLUEDataset
|
|||
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号,默认值:None。只有当指定了 `num_shards` 时才能指定此参数。
|
||||
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值:None,不使用缓存。
|
||||
|
||||
根据给定的 `task` 参数配置,数据集会生成不同的输出列:
|
||||
根据给定的 `task` 参数 和 `usage` 配置,数据集会生成不同的输出列:
|
||||
|
||||
- `task` = 'AFQMC'
|
||||
- usage = 'train',输出列: `[sentence1, dtype=string]` , `[sentence2, dtype=string]` , `[label, dtype=string]` .
|
||||
- usage = 'test',输出列: `[id, dtype=uint8]` , `[sentence1, dtype=string]` , `[sentence2, dtype=string]` .
|
||||
- usage = 'eval',输出列: `[sentence1, dtype=string]` , `[sentence2, dtype=string]` , `[label, dtype=string]` .
|
||||
|
||||
- `task` = 'TNEWS'
|
||||
- usage = 'train',输出列: `[label, dtype=string]` , `[label_des, dtype=string]` , `[sentence, dtype=string]` , `[keywords, dtype=string]` .
|
||||
- usage = 'test',输出列: `[label, dtype=string]` , `[label_des, dtype=string]` , `[sentence, dtype=string]` , `[keywords, dtype=string]` .
|
||||
- usage = 'eval',输出列: `[label, dtype=string]` , `[label_des, dtype=string]` , `[sentence, dtype=string]` , `[keywords, dtype=string]` .
|
||||
|
||||
- `task` = 'IFLYTEK'
|
||||
- usage = 'train',输出列: `[label, dtype=string]` , `[label_des, dtype=string]` , `[sentence, dtype=string]` .
|
||||
- usage = 'test',输出列: `[id, dtype=string]` , `[sentence, dtype=string]` .
|
||||
- usage = 'eval',输出列: `[label, dtype=string]` , `[label_des, dtype=string]` , `[sentence, dtype=string]` .
|
||||
|
||||
- `task` = 'CMNLI'
|
||||
- usage = 'train',输出列: `[sentence1, dtype=string]` , `[sentence2, dtype=string]` , `[label, dtype=string]` .
|
||||
- usage = 'test',输出列: `[id, dtype=uint8]` , `[sentence1, dtype=string]` , `[sentence2, dtype=string]` .
|
||||
- usage = 'eval',输出列: `[sentence1, dtype=string]` , `[sentence2, dtype=string]` , `[label, dtype=string]` .
|
||||
|
||||
- `task` = 'WSC'
|
||||
- usage = 'train',输出列: `[span1_index, dtype=uint8]` , `[span2_index, dtype=uint8]` , `[span1_text, dtype=string]` , `[span2_text, dtype=string]` , `[idx, dtype=uint8]` , `[text, dtype=string]` , `[label, dtype=string]` .
|
||||
- usage = 'test',输出列: `[span1_index, dtype=uint8]` , `[span2_index, dtype=uint8]` , `[span1_text, dtype=string]` , `[span2_text, dtype=string]` , `[idx, dtype=uint8]` , `[text, dtype=string]` .
|
||||
- usage = 'eval',输出列: `[span1_index, dtype=uint8]` , `[span2_index, dtype=uint8]` , `[span1_text, dtype=string]` , `[span2_text, dtype=string]` , `[idx, dtype=uint8]` , `[text, dtype=string]` , `[label, dtype=string]` .
|
||||
|
||||
- `task` = 'CSL'
|
||||
- usage = 'train',输出列: `[id, dtype=uint8]` , `[abst, dtype=string]` , `[keyword, dtype=string]` , `[label, dtype=string]` .
|
||||
- usage = 'test',输出列: `[id, dtype=uint8]` , `[abst, dtype=string]` , `[keyword, dtype=string]` .
|
||||
- usage = 'eval',输出列: `[id, dtype=uint8]` , `[abst, dtype=string]` , `[keyword, dtype=string]` , `[label, dtype=string]` .
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
| `task` | `usage` | 输出列 |
|
||||
+=========================+==============================+=============================+
|
||||
| AFQMC | train | [sentence1, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence2, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | test | [id, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [sentence1, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence2, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | eval | [sentence1, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence2, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
| TNEWS | train | [label, dtype=string] |
|
||||
| | | |
|
||||
| | | [label_des, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence, dtype=string] |
|
||||
| | | |
|
||||
| | | [keywords, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | test | [label, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [keywords, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | eval | [label, dtype=string] |
|
||||
| | | |
|
||||
| | | [label_des, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence, dtype=string] |
|
||||
| | | |
|
||||
| | | [keywords, dtype=string] |
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
| IFLYTEK | train | [label, dtype=string] |
|
||||
| | | |
|
||||
| | | [label_des, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | test | [id, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [sentence, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | eval | [label, dtype=string] |
|
||||
| | | |
|
||||
| | | [label_des, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence, dtype=string] |
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
| CMNLI | train | [sentence1, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence2, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | test | [id, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [sentence1, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence2, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | eval | [sentence1, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence2, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
| WSC | train | [span1_index, dtype=uint32]|
|
||||
| | | |
|
||||
| | | [span2_index, dtype=uint32]|
|
||||
| | | |
|
||||
| | | [span1_text, dtype=string] |
|
||||
| | | |
|
||||
| | | [span2_text, dtype=string] |
|
||||
| | | |
|
||||
| | | [idx, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [text, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | test | [span1_index, dtype=uint32]|
|
||||
| | | |
|
||||
| | | [span2_index, dtype=uint32]|
|
||||
| | | |
|
||||
| | | [span1_text, dtype=string] |
|
||||
| | | |
|
||||
| | | [span2_text, dtype=string] |
|
||||
| | | |
|
||||
| | | [idx, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [text, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | eval | [span1_index, dtype=uint32]|
|
||||
| | | |
|
||||
| | | [span2_index, dtype=uint32]|
|
||||
| | | |
|
||||
| | | [span1_text, dtype=string] |
|
||||
| | | |
|
||||
| | | [span2_text, dtype=string] |
|
||||
| | | |
|
||||
| | | [idx, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [text, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
| CSL | train | [id, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [abst, dtype=string] |
|
||||
| | | |
|
||||
| | | [keyword, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | test | [id, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [abst, dtype=string] |
|
||||
| | | |
|
||||
| | | [keyword, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | eval | [id, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [abst, dtype=string] |
|
||||
| | | |
|
||||
| | | [keyword, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
|
||||
**异常:**
|
||||
|
||||
- **RuntimeError** - `dataset_files` 参数所指向的文件无效或不存在。
|
||||
- **RuntimeError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - `dataset_files` 参数所指向的文件无效或不存在。
|
||||
- **ValueError** - `task` 参数不为 'AFQMC'、'TNEWS'、'IFLYTEK'、'CMNLI'、'WSC' 或 'CSL'。
|
||||
- **ValueError** - `usage` 参数不为 'train'、'test' 或 'eval'。
|
||||
- **ValueError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **RuntimeError** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError** - 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError:** - `shard_id` 参数错误(小于0或者大于等于 `num_shards` )。
|
||||
|
||||
**关于CLUE数据集:**
|
||||
|
||||
|
|
|
@ -27,9 +27,11 @@
|
|||
**异常:**
|
||||
|
||||
- **RuntimeError** - `dataset_files` 参数所指向的文件无效或不存在。
|
||||
- **RuntimeError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - `field_delim` 参数无效。
|
||||
- **ValueError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **RuntimeError** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError** - 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError:** - `shard_id` 参数错误(小于0或者大于等于 `num_shards` )。
|
||||
|
||||
.. include:: mindspore.dataset.Dataset.rst
|
||||
|
||||
|
|
|
@ -24,7 +24,8 @@ mindspore.dataset.CelebADataset
|
|||
**异常:**
|
||||
|
||||
- **RuntimeError** - `dataset_dir` 路径下不包含任何数据文件。
|
||||
- **RuntimeError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - `usage` 参数取值不为'train'、'valid'、'test'或'all'。
|
||||
- **RuntimeError** - 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **RuntimeError** - 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **RuntimeError** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
|
|
|
@ -22,13 +22,14 @@ mindspore.dataset.Cifar100Dataset
|
|||
|
||||
**异常:**
|
||||
|
||||
- **RuntimeError:** `dataset_dir` 路径下不包含数据文件。
|
||||
- **RuntimeError:** `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **RuntimeError:** 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **RuntimeError:** 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **RuntimeError:** 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError:** 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError:** `shard_id` 参数错误(小于0或者大于等于 `num_shards`)。
|
||||
- **RuntimeError:** - `dataset_dir` 路径下不包含数据文件。
|
||||
- **ValueError:** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - `usage` 参数取值不为'train'、'test'或'all'。
|
||||
- **RuntimeError:** - 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **RuntimeError:** - 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **RuntimeError:** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError:** - 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError:** - `shard_id` 参数错误(小于0或者大于等于 `num_shards`)。
|
||||
|
||||
.. note:: 此数据集可以指定参数 `sampler` ,但参数 `sampler` 和参数 `shuffle` 的行为是互斥的。下表展示了几种合法的输入参数组合及预期的行为。
|
||||
|
||||
|
|
|
@ -22,13 +22,14 @@ mindspore.dataset.Cifar10Dataset
|
|||
|
||||
**异常:**
|
||||
|
||||
- **RuntimeError:** `dataset_dir` 路径下不包含数据文件。
|
||||
- **RuntimeError:** `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **RuntimeError:** 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **RuntimeError:** 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **RuntimeError:** 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError:** 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError:** `shard_id` 参数错误(小于0或者大于等于 `num_shards` )。
|
||||
- **RuntimeError:** - `dataset_dir` 路径下不包含数据文件。
|
||||
- **ValueError:** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - `usage` 参数取值不为'train'、'test'或'all'。
|
||||
- **RuntimeError:** - 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **RuntimeError:** - 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **RuntimeError:** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError:** - 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError:** - `shard_id` 参数错误(小于0或者大于等于 `num_shards` )。
|
||||
|
||||
.. note:: 此数据集可以指定参数 `sampler` ,但参数 `sampler` 和参数 `shuffle` 的行为是互斥的。下表展示了几种合法的输入参数组合及预期的行为。
|
||||
|
||||
|
|
|
@ -12,30 +12,59 @@
|
|||
- **task** (str, 可选) - 指定COCO数据的任务类型。支持的任务类型包括:'Detection'、'Stuff' 、'Panoptic'和'Keypoint'。默认值:'Detection'。
|
||||
- **num_samples** (int, 可选) - 指定从数据集中读取的样本数,可以小于数据集总数。默认值:None,全部样本图片。
|
||||
- **num_parallel_workers** (int, 可选) - 指定读取数据的工作线程数,默认值:使用mindspore.dataset.config中配置的线程数。
|
||||
- **shuffle** (bool, 可选) - 是否混洗数据集。默认值:None,下表中会展示不同参数配置的预期行为。
|
||||
- **shuffle** (bool, 可选) - 是否混洗数据集。默认值:None,表2中会展示不同参数配置的预期行为。
|
||||
- **decode** (bool, 可选) - 是否对读取的图片进行解码操作,默认值:False,不解码。
|
||||
- **sampler** (Sampler, 可选) - 指定从数据集中选取样本的采样器,默认值:None,下表中会展示不同配置的预期行为。
|
||||
- **sampler** (Sampler, 可选) - 指定从数据集中选取样本的采样器,默认值:None,表2中会展示不同配置的预期行为。
|
||||
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数,默认值:None。指定此参数后, `num_samples` 表示每个分片的最大样本数。
|
||||
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号,默认值:None。只有当指定了 `num_shards` 时才能指定此参数。
|
||||
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值:None,不使用缓存。
|
||||
- **extra_metadata** (bool, 可选) - 用于指定是否额外输出一个数据列用于表示图片元信息。如果为True,则将额外输出一个名为 `[_meta-filename, dtype=string]` 的数据列,默认值:False。
|
||||
|
||||
根据不同 `task` 参数设置,生成数据集具有不同的输出列:
|
||||
[表1] 根据不同 `task` 参数设置,生成数据集具有不同的输出列:
|
||||
|
||||
- `task` = 'Detection', 输出列: `[image, dtype=uint8]` , `[bbox, dtype=float32]` , `[category_id, dtype=uint32]` , `[iscrowd, dtype=uint32]` 。
|
||||
- `task` = 'Stuff', 输出列: `[image, dtype=uint8]` , `[segmentation,dtype=float32]` , `[iscrowd,dtype=uint32]` 。
|
||||
- `task` = 'Keypoint', 输出列: `[image, dtype=uint8]` , `[keypoints, dtype=float32]` , `[num_keypoints, dtype=uint32]` 。
|
||||
- `task` = 'Panoptic', 输出列: `[image, dtype=uint8]` , `[bbox, dtype=float32]` , `[category_id, dtype=uint32]` , `[iscrowd, dtype=uint32]` , `[area, dtype=uint32]` 。
|
||||
+-------------------------+----------------------------------------------+
|
||||
| `task` | 输出列 |
|
||||
+=========================+==============================================+
|
||||
| Detection | [image, dtype=uint8] |
|
||||
| | |
|
||||
| | [bbox, dtype=float32] |
|
||||
| | |
|
||||
| | [category_id, dtype=uint32] |
|
||||
| | |
|
||||
| | [iscrowd, dtype=uint32] |
|
||||
+-------------------------+----------------------------------------------+
|
||||
| Stuff | [image, dtype=uint8] |
|
||||
| | |
|
||||
| | [segmentation, dtype=float32] |
|
||||
| | |
|
||||
| | [iscrowd, dtype=uint32] |
|
||||
+-------------------------+----------------------------------------------+
|
||||
| Keypoint | [image, dtype=uint8] |
|
||||
| | |
|
||||
| | [keypoints, dtype=float32] |
|
||||
| | |
|
||||
| | [num_keypoints, dtype=uint32] |
|
||||
+-------------------------+----------------------------------------------+
|
||||
| Panoptic | [image, dtype=uint8] |
|
||||
| | |
|
||||
| | [bbox, dtype=float32] |
|
||||
| | |
|
||||
| | [category_id, dtype=uint32] |
|
||||
| | |
|
||||
| | [iscrowd, dtype=uint32] |
|
||||
| | |
|
||||
| | [area, dtype=uint32] |
|
||||
+-------------------------+----------------------------------------------+
|
||||
|
||||
**异常:**
|
||||
|
||||
- **RuntimeError** - `dataset_dir` 路径下不包含任何数据文件。
|
||||
- **RuntimeError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **RuntimeError** - 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **RuntimeError** - 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **RuntimeError** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError** - 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **RuntimeError** - 解析 `annotation_file` 指定的JSON文件失败。
|
||||
- **ValueError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - `task` 参数取值不为 `Detection` 、 `Stuff` 、`Panoptic` 或 `Keypoint` 。
|
||||
- **ValueError** - `annotation_file` 参数对应的文件不存在。
|
||||
- **ValueError** - `dataset_dir` 参数路径不存在。
|
||||
|
@ -47,7 +76,7 @@
|
|||
- CocoDataset的 `sampler` 参数不支持指定PKSampler。
|
||||
- 此数据集可以指定参数 `sampler` ,但参数 `sampler` 和参数 `shuffle` 的行为是互斥的。下表展示了几种合法的输入参数组合及预期的行为。
|
||||
|
||||
.. list-table:: 配置 `sampler` 和 `shuffle` 的不同组合得到的预期排序结果
|
||||
.. list-table:: [表2] 配置 `sampler` 和 `shuffle` 的不同组合得到的预期排序结果
|
||||
:widths: 25 25 50
|
||||
:header-rows: 1
|
||||
|
||||
|
|
|
@ -176,7 +176,7 @@
|
|||
- **python_multiprocessing** (bool, 可选) - 启用Python多进程模式加速map操作。当传入的 `operations` 计算量很大时,开启此选项可能会有较好效果。默认值:False。
|
||||
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值:None,不使用缓存。
|
||||
- **callbacks** (DSCallback, list[DSCallback], 可选) - 要调用的Dataset回调函数列表。默认值:None。
|
||||
- **max_rowsize** (int, 可选) - 指定在多进程之间复制数据时,共享内存分配的最大空间,仅当 `python_multiprocessing` 为True时,该选项有效。默认值:16,数量级为MB。
|
||||
- **max_rowsize** (int, 可选) - 指定在多进程之间复制数据时,共享内存分配的最大空间,仅当 `python_multiprocessing` 为True时,该选项有效。默认值:16,单位为MB。
|
||||
- **offload** (bool, 可选) - 是否进行异构硬件加速,详情请阅读 `数据准备异构加速 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/enable_dataset_offload.html>`_ 。默认值:None。
|
||||
|
||||
.. note::
|
||||
|
|
|
@ -12,8 +12,8 @@
|
|||
|
||||
**异常:**
|
||||
|
||||
- **TypeError:** `apply_func` 的类型不是函数。
|
||||
- **TypeError:** `apply_func` 未返回Dataset对象。
|
||||
- **TypeError:** - `apply_func` 的类型不是函数。
|
||||
- **TypeError:** - `apply_func` 未返回Dataset对象。
|
||||
|
||||
.. py:method:: batch(batch_size, drop_remainder=False, num_parallel_workers=None, per_batch_map=None, input_columns=None, output_columns=None, column_order=None, pad_info=None, python_multiprocessing=False, max_rowsize=16)
|
||||
|
||||
|
@ -46,6 +46,7 @@
|
|||
- **pad_info** (dict, 可选) - 对给定数据列进行填充。通过传入dict来指定列信息与填充信息,例如 `pad_info={"col1":([224,224],0)}` ,
|
||||
则将列名为"col1"的数据列扩充到shape为[224,224]的Tensor,缺失的值使用0填充。默认值:None,不填充。
|
||||
- **python_multiprocessing** (bool, 可选) - 启动Python多进程模式并行执行 `per_batch_map` 。如果 `per_batch_map` 的计算量很大,此选项可能会很有用。默认值:False,不启用多进程。
|
||||
- **max_rowsize** (int, 可选) - 指定在多进程之间复制数据时,共享内存分配的最大空间,仅当 `python_multiprocessing` 为True时,该选项有效。默认值:16,单位为MB。
|
||||
|
||||
**返回:**
|
||||
|
||||
|
|
|
@ -25,17 +25,17 @@
|
|||
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数,默认值:None。指定此参数后, `num_samples` 表示每个分片的最大样本数。
|
||||
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号,默认值:None。只有当指定了 `num_shards` 时才能指定此参数。
|
||||
- **python_multiprocessing** (bool,可选) - 启用Python多进程模式加速运算,默认值:True。当传入 `source` 的Python对象的计算量很大时,开启此选项可能会有较好效果。
|
||||
- **max_rowsize** (int,可选) - 指定在多进程之间复制数据时,共享内存分配的最大空间,默认值:6,数量级为MB。仅当参数 `python_multiprocessing` 设为True时,此参数才会生效。
|
||||
- **max_rowsize** (int,可选) - 指定在多进程之间复制数据时,共享内存分配的最大空间,默认值:6,单位为MB。仅当参数 `python_multiprocessing` 设为True时,此参数才会生效。
|
||||
|
||||
**异常:**
|
||||
|
||||
- **RuntimeError** - Python对象 `source` 在执行期间引发异常。
|
||||
- **RuntimeError** - `column_names` 参数指定的列名数量与 `source` 参数输出的数据数量不匹配。
|
||||
- **RuntimeError** - `num_parallel_workers` 参数超过最大线程数。
|
||||
- **RuntimeError** - 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **RuntimeError** - 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **RuntimeError** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError** - 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError** - `num_parallel_workers` 参数超过最大线程数。
|
||||
- **ValueError** - 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **ValueError** - 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **ValueError** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **ValueError** - 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError** - `shard_id` 参数值错误(小于0或者大于等于 `num_shards` )。
|
||||
|
||||
.. note::
|
||||
|
|
|
@ -20,6 +20,18 @@ mindspore.dataset.GraphData
|
|||
- **num_client** (int, 可选) - 期望连接到服务器的最大客户端数。服务器将根据该参数分配资源。该参数仅在工作模式设置为 'server' 时有效,默认值:1。
|
||||
- **auto_shutdown** (bool, 可选) - 当工作模式设置为 'server' 时有效。当连接的客户端数量达到 `num_client` ,且没有客户端正在连接时,服务器将自动退出,默认值:True。
|
||||
|
||||
**异常:**
|
||||
|
||||
- **ValueError:** - `dataset_file` 路径下数据文件不存在或无效。
|
||||
- **ValueError:** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - `working_mode` 参数取值不为'local', 'client' 或 'server'。
|
||||
- **TypeError:** - `hostname` 参数类型错误。
|
||||
- **ValueError:** - `port` 参数不在范围[1024, 65535]内。
|
||||
- **ValueError:** - `num_client` 参数不在范围[1, 255]内。
|
||||
|
||||
**支持平台:**:
|
||||
|
||||
- ``CPU``
|
||||
|
||||
.. py:method:: get_all_edges(edge_type)
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ mindspore.dataset.ImageFolderDataset
|
|||
**异常:**
|
||||
|
||||
- **RuntimeError** - `dataset_dir` 不包含任何数据文件。
|
||||
- **RuntimeError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **RuntimeError** - 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **RuntimeError** - 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **RuntimeError** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
**异常:**
|
||||
|
||||
- **RuntimeError** - `dataset_files` 路径下不包含任何数据文件。
|
||||
- **RuntimeError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **RuntimeError** - 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **RuntimeError** - 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **RuntimeError** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
|
|
|
@ -28,8 +28,8 @@
|
|||
|
||||
**异常:**
|
||||
|
||||
- **RuntimeError** - `dataset_files` 参数所指向的文件无效或不存在。
|
||||
- **RuntimeError** - `num_parallel_workers` 参数超过最大线程数。
|
||||
- **ValueError** - `dataset_files` 参数所指向的文件无效或不存在。
|
||||
- **ValueError** - `num_parallel_workers` 参数超过最大线程数。
|
||||
- **RuntimeError** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError** - 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError** - `shard_id` 参数值错误(小于0或者大于等于 `num_shards` )。
|
||||
|
|
|
@ -22,13 +22,14 @@ mindspore.dataset.MnistDataset
|
|||
|
||||
**异常:**
|
||||
|
||||
- **RuntimeError:** `dataset_dir` 路径下不包含数据文件。
|
||||
- **RuntimeError:** `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **RuntimeError:** 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **RuntimeError:** 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **RuntimeError:** 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError:** 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError:** `shard_id` 参数错误(小于0或者大于等于 `num_shards` )。
|
||||
- **RuntimeError:** - `dataset_dir` 路径下不包含数据文件。
|
||||
- **ValueError:** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - `usage` 参数取值不为'train'、'test'或'all'。
|
||||
- **RuntimeError:** - 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **RuntimeError:** - 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **RuntimeError:** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError:** - 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError:** - `shard_id` 参数错误(小于0或者大于等于 `num_shards` )。
|
||||
|
||||
.. note:: 此数据集可以指定参数 `sampler` ,但参数 `sampler` 和参数 `shuffle` 的行为是互斥的。下表展示了几种合法的输入参数组合及预期的行为。
|
||||
|
||||
|
|
|
@ -51,12 +51,12 @@ mindspore.dataset.NumpySlicesDataset
|
|||
**异常:**
|
||||
|
||||
- **RuntimeError** - `column_names` 列表的长度与数据的输出列表长度不匹配。
|
||||
- **RuntimeError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **RuntimeError:** 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **RuntimeError:** 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **RuntimeError:** 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError:** 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError:** `shard_id` 参数值错误(小于0或者大于等于 `num_shards` )。
|
||||
- **ValueError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError:** - 同时指定了 `sampler` 和 `shuffle` 参数。
|
||||
- **ValueError:** - 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **ValueError:** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **ValueError:** - 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError:** - `shard_id` 参数值错误(小于0或者大于等于 `num_shards` )。
|
||||
|
||||
.. include:: mindspore.dataset.Dataset.add_sampler.rst
|
||||
|
||||
|
|
|
@ -31,8 +31,8 @@ mindspore.dataset.TFRecordDataset
|
|||
|
||||
**异常:**
|
||||
|
||||
- **RuntimeError** - `dataset_files` 参数所指向的文件无效或不存在。
|
||||
- **RuntimeError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - `dataset_files` 参数所指向的文件无效或不存在。
|
||||
- **ValueError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **RuntimeError** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError** - 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError** - `shard_id` 参数值错误(小于0或者大于等于 `num_shards` )。
|
||||
|
|
|
@ -23,10 +23,11 @@
|
|||
|
||||
**异常:**
|
||||
|
||||
- **RuntimeError** - `dataset_files` 参数所指向的文件无效或不存在。
|
||||
- **RuntimeError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - `dataset_files` 参数所指向的文件无效或不存在。
|
||||
- **ValueError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **RuntimeError** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError** - 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError** - `shard_id` 参数值错误(小于0或者大于等于 `num_shards` )。
|
||||
|
||||
.. include:: mindspore.dataset.Dataset.rst
|
||||
|
||||
|
|
|
@ -33,7 +33,6 @@ mindspore.dataset.VOCDataset
|
|||
**异常:**
|
||||
|
||||
- **RuntimeError** - `dataset_dir` 路径下不包含任何数据文件。
|
||||
- **RuntimeError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **RuntimeError** - 读取的xml文件格式异常或无效。
|
||||
- **RuntimeError** - 读取的xml文件缺失 `object` 属性。
|
||||
- **RuntimeError** - 读取的xml文件缺失 `bndbox` 属性。
|
||||
|
@ -41,6 +40,7 @@ mindspore.dataset.VOCDataset
|
|||
- **RuntimeError** - 同时指定了 `sampler` 和 `num_shards` 参数。
|
||||
- **RuntimeError** - 指定了 `num_shards` 参数,但是未指定 `shard_id` 参数。
|
||||
- **RuntimeError** - 指定了 `shard_id` 参数,但是未指定 `num_shards` 参数。
|
||||
- **ValueError** - `num_parallel_workers` 参数超过系统最大线程数。
|
||||
- **ValueError** - 指定的任务不为'Segmentation'或'Detection'。
|
||||
- **ValueError** - 指定任务为'Segmentation'时, `class_indexing` 参数不为None。
|
||||
- **ValueError** - 与 `usage` 参数相关的txt文件不存在。
|
||||
|
|
|
@ -857,13 +857,10 @@ class Dataset:
|
|||
"""
|
||||
Filter dataset by prediction.
|
||||
|
||||
Note:
|
||||
If input_columns not provided or provided with empty, all columns will be used.
|
||||
|
||||
Args:
|
||||
predicate (callable): Python callable which returns a boolean value. If False then filter the element.
|
||||
input_columns (Union[str, list[str]], optional): List of names of the input columns, when
|
||||
default=None, the predicate will be applied on all columns in the dataset.
|
||||
input_columns (Union[str, list[str]], optional): List of names of the input columns. If not provided
|
||||
or provided with None, the predicate will be applied on all columns in the dataset (default=None).
|
||||
num_parallel_workers (int, optional): Number of workers to process the dataset
|
||||
in parallel (default=None).
|
||||
|
||||
|
|
|
@ -44,9 +44,9 @@ class CMUArcticDataset(MappableDataset, AudioBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
name (str, optional): Part of this dataset, can be ""aew", "ahw", "aup", "awb", "axb", "bdl",
|
||||
"clb", "eey", "fem", "gka", "jmk", "ksp", "ljm", "lnh", "rms", "rxr", "slp" or "slt"
|
||||
(default=None, equal "aew").
|
||||
name (str, optional): Part of this dataset, can be 'aew', 'ahw', 'aup', 'awb', 'axb', 'bdl',
|
||||
'clb', 'eey', 'fem', 'gka', 'jmk', 'ksp', 'ljm', 'lnh', 'rms', 'rxr', 'slp' or 'slt'
|
||||
(default=None, equal 'aew').
|
||||
num_samples (int, optional): The number of audio to be included in the dataset
|
||||
(default=None, will read all audio).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
|
@ -180,7 +180,7 @@ class GTZANDataset(MappableDataset, AudioBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be "train", "valid", "test" or "all"
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'valid', 'test' or 'all'
|
||||
(default=None, all samples).
|
||||
num_samples (int, optional): The number of audio to be included in the dataset
|
||||
(default=None, will read all audio).
|
||||
|
@ -315,8 +315,8 @@ class LibriTTSDataset(MappableDataset, AudioBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Part of this dataset, can be ""dev-clean", "dev-other", "test-clean", "test-other",
|
||||
"train-clean-100", "train-clean-360", "train-other-500", or "all" (default=None, equal "all").
|
||||
usage (str, optional): Part of this dataset, can be 'dev-clean', 'dev-other', 'test-clean', 'test-other',
|
||||
'train-clean-100', 'train-clean-360', 'train-other-500', or 'all' (default=None, equal 'all').
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, will read all audio).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
|
@ -598,9 +598,9 @@ class SpeechCommandsDataset(MappableDataset, AudioBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test`, `valid` or `all`. `train`
|
||||
will read from 84,843 samples, `test` will read from 11,005 samples, `valid` will read from 9,981
|
||||
test samples and `all` will read from all 105,829 samples (default=None, will read all samples).
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test', 'valid' or 'all'. 'train'
|
||||
will read from 84,843 samples, 'test' will read from 11,005 samples, 'valid' will read from 9,981
|
||||
test samples and 'all' will read from all 105,829 samples (default=None, will read all samples).
|
||||
num_samples (int, optional): The number of samples to be included in the dataset
|
||||
(default=None, will read all samples).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
|
@ -727,14 +727,14 @@ class TedliumDataset(MappableDataset, AudioBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
release (str): Release of the dataset, can be "release1", "release2", "release3".
|
||||
release (str): Release of the dataset, can be 'release1', 'release2', 'release3'.
|
||||
usage (str, optional): Usage of this dataset.
|
||||
For release1 or release2, can be `train`, `test`, `dev` or `all`.
|
||||
`train` will read from train samples,
|
||||
`test` will read from test samples,
|
||||
`dev` will read from dev samples,
|
||||
`all` will read from all samples.
|
||||
For release3, can only be "all", it will read from data samples (default=None, all samples).
|
||||
For release1 or release2, can be 'train', 'test', 'dev' or 'all'.
|
||||
'train' will read from train samples,
|
||||
'test' will read from test samples,
|
||||
'dev' will read from dev samples,
|
||||
'all' will read from all samples.
|
||||
For release3, can only be 'all', it will read from data samples (default=None, all samples).
|
||||
extensions (str): Extensions of the SPH files, only '.sph' is valid.
|
||||
(default=None, ".sph").
|
||||
num_samples (int, optional): The number of audio samples to be included in the dataset
|
||||
|
|
|
@ -46,7 +46,7 @@ class AGNewsDataset(SourceDataset, TextBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Acceptable usages include `train`, `test` and `all` (default=None, all samples).
|
||||
usage (str, optional): Acceptable usages include 'train', 'test' and 'all' (default=None, all samples).
|
||||
num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
(default=None, number set in the config).
|
||||
|
@ -134,13 +134,13 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
|
|||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the Amazon Review Polarity dataset
|
||||
or the Amazon Review Full dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` (default= `all`).
|
||||
For Polarity dataset, `train` will read from 3,600,000 train samples,
|
||||
`test` will read from 400,000 test samples,
|
||||
`all` will read from all 4,000,000 samples.
|
||||
For Full dataset, `train` will read from 3,000,000 train samples,
|
||||
`test` will read from 650,000 test samples,
|
||||
`all` will read from all 3,650,000 samples (default=None, all samples).
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' (default= 'all').
|
||||
For Polarity dataset, 'train' will read from 3,600,000 train samples,
|
||||
'test' will read from 400,000 test samples,
|
||||
'all' will read from all 4,000,000 samples.
|
||||
For Full dataset, 'train' will read from 3,000,000 train samples,
|
||||
'test' will read from 650,000 test samples,
|
||||
'all' will read from all 3,650,000 samples (default=None, all samples).
|
||||
num_samples (int, optional): Number of samples (rows) to be read (default=None, reads the full dataset).
|
||||
shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
|
||||
(default=Shuffle.GLOBAL).
|
||||
|
@ -221,14 +221,14 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
|
|||
class CLUEDataset(SourceDataset, TextBaseDataset):
|
||||
"""
|
||||
A source dataset that reads and parses CLUE datasets.
|
||||
Supported CLUE classification tasks: `AFQMC`, `TNEWS`, `IFLYTEK`, `CMNLI`, `WSC` and `CSL`.
|
||||
Supported CLUE classification tasks: 'AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC' and 'CSL'.
|
||||
|
||||
Args:
|
||||
dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for
|
||||
a pattern of files. The list will be sorted in a lexicographical order.
|
||||
task (str, optional): The kind of task, one of `AFQMC`, `TNEWS`, `IFLYTEK`, `CMNLI`, `WSC` and `CSL`.
|
||||
task (str, optional): The kind of task, one of 'AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC' and 'CSL'.
|
||||
(default=AFQMC).
|
||||
usage (str, optional): Specify the `train`, `test` or `eval` part of dataset (default="train").
|
||||
usage (str, optional): Specify the 'train', 'test' or 'eval' part of dataset (default='train').
|
||||
num_samples (int, optional): The number of samples to be included in the dataset
|
||||
(default=None, will include all images).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
|
@ -250,63 +250,147 @@ class CLUEDataset(SourceDataset, TextBaseDataset):
|
|||
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
|
||||
(default=None, which means no cache is used).
|
||||
|
||||
Note:
|
||||
The generated dataset with different task setting has different output columns:
|
||||
The generated dataset with different task setting has different output columns:
|
||||
|
||||
- task = :py:obj:`AFQMC`
|
||||
- usage = :py:obj:`train`, output columns: :py:obj:`[sentence1, dtype=string]`, \
|
||||
:py:obj:`[sentence2, dtype=string]`, :py:obj:`[label, dtype=string]`.
|
||||
- usage = :py:obj:`test`, output columns: :py:obj:`[id, dtype=uint32]`, \
|
||||
:py:obj:`[sentence1, dtype=string]`, :py:obj:`[sentence2, dtype=string]`.
|
||||
- usage = :py:obj:`eval`, output columns: :py:obj:`[sentence1, dtype=string]`, \
|
||||
:py:obj:`[sentence2, dtype=string]`, :py:obj:`[label, dtype=string]`.
|
||||
|
||||
- task = :py:obj:`TNEWS`
|
||||
- usage = :py:obj:`train`, output columns: :py:obj:`[label, dtype=string]`, \
|
||||
:py:obj:`[label_des, dtype=string]`, :py:obj:`[sentence, dtype=string]`, \
|
||||
:py:obj:`[keywords, dtype=string]`.
|
||||
- usage = :py:obj:`test`, output columns: :py:obj:`[id, dtype=uint32]`, \
|
||||
:py:obj:`[keywords, dtype=string]`, :py:obj:`[sentence, dtype=string]`.
|
||||
- usage = :py:obj:`eval`, output columns: :py:obj:`[label, dtype=string]`, \
|
||||
:py:obj:`[label_desc, dtype=string]`, :py:obj:`[sentence, dtype=string]`,\
|
||||
:py:obj:`[keywords, dtype=string]`.
|
||||
|
||||
- task = :py:obj:`IFLYTEK`
|
||||
- usage = :py:obj:`train`, output columns: :py:obj:`[label, dtype=string]`, \
|
||||
:py:obj:`[label_des, dtype=string]`, :py:obj:`[sentence, dtype=string]`.
|
||||
- usage = :py:obj:`test`, output columns: :py:obj:`[id, dtype=uint32]`, \
|
||||
:py:obj:`[sentence, dtype=string]`.
|
||||
- usage = :py:obj:`eval`, output columns: :py:obj:`[label, dtype=string]`, \
|
||||
:py:obj:`[label_des, dtype=string]`, :py:obj:`[sentence, dtype=string]`.
|
||||
|
||||
- task = :py:obj:`CMNLI`
|
||||
- usage = :py:obj:`train`, output columns: :py:obj:`[sentence1, dtype=string]`, \
|
||||
:py:obj:`[sentence2, dtype=string]`, :py:obj:`[label, dtype=string]`.
|
||||
- usage = :py:obj:`test`, output columns: :py:obj:`[id, dtype=uint32]`, \
|
||||
:py:obj:`[sentence1, dtype=string]`, :py:obj:`[sentence2, dtype=string]`.
|
||||
- usage = :py:obj:`eval`, output columns: :py:obj:`[sentence1, dtype=string]`, \
|
||||
:py:obj:`[sentence2, dtype=string]`, :py:obj:`[label, dtype=string]`.
|
||||
|
||||
- task = :py:obj:`WSC`
|
||||
- usage = :py:obj:`train`, output columns: :py:obj:`[span1_index, dtype=uint32]`, \
|
||||
:py:obj:`[span2_index, dtype=uint32]`, :py:obj:`[span1_text, dtype=string]`, \
|
||||
:py:obj:`[span2_text, dtype=string]`, :py:obj:`[idx, dtype=uint32]`, \
|
||||
:py:obj:`[text, dtype=string]`, :py:obj:`[label, dtype=string]`.
|
||||
- usage = :py:obj:`test`, output columns: :py:obj:`[span1_index, dtype=uint32]`, \
|
||||
:py:obj:`[span2_index, dtype=uint32]`, :py:obj:`[span1_text, dtype=string]`, \
|
||||
:py:obj:`[span2_text, dtype=string]`, :py:obj:`[idx, dtype=uint32]`, :py:obj:`[text, dtype=string]`.
|
||||
- usage = :py:obj:`eval`, output columns: :py:obj:`[span1_index, dtype=uint32]`, \
|
||||
:py:obj:`[span2_index, dtype=uint32]`, :py:obj:`[span1_text, dtype=string]`, \
|
||||
:py:obj:`[span2_text, dtype=string]`, :py:obj:`[idx, dtype=uint32]`, \
|
||||
:py:obj:`[text, dtype=string]`, :py:obj:`[label, dtype=string]`.
|
||||
|
||||
- task = :py:obj:`CSL`
|
||||
- usage = :py:obj:`train`, output columns: :py:obj:`[id, dtype=uint32]`, \
|
||||
:py:obj:`[abst, dtype=string]`, :py:obj:`[keyword, dtype=string]`, :py:obj:`[label, dtype=string]`.
|
||||
- usage = :py:obj:`test`, output columns: :py:obj:`[id, dtype=uint32]`, \
|
||||
:py:obj:`[abst, dtype=string]`, :py:obj:`[keyword, dtype=string]`.
|
||||
- usage = :py:obj:`eval`, output columns: :py:obj:`[id, dtype=uint32]`, \
|
||||
:py:obj:`[abst, dtype=string]`, :py:obj:`[keyword, dtype=string]`, :py:obj:`[label, dtype=string]`.
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
| `task` | `usage` | Output column |
|
||||
+=========================+==============================+=============================+
|
||||
| AFQMC | train | [sentence1, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence2, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | test | [id, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [sentence1, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence2, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | eval | [sentence1, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence2, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
| TNEWS | train | [label, dtype=string] |
|
||||
| | | |
|
||||
| | | [label_des, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence, dtype=string] |
|
||||
| | | |
|
||||
| | | [keywords, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | test | [label, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [keywords, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | eval | [label, dtype=string] |
|
||||
| | | |
|
||||
| | | [label_des, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence, dtype=string] |
|
||||
| | | |
|
||||
| | | [keywords, dtype=string] |
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
| IFLYTEK | train | [label, dtype=string] |
|
||||
| | | |
|
||||
| | | [label_des, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | test | [id, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [sentence, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | eval | [label, dtype=string] |
|
||||
| | | |
|
||||
| | | [label_des, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence, dtype=string] |
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
| CMNLI | train | [sentence1, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence2, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | test | [id, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [sentence1, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence2, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | eval | [sentence1, dtype=string] |
|
||||
| | | |
|
||||
| | | [sentence2, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
| WSC | train | [span1_index, dtype=uint32]|
|
||||
| | | |
|
||||
| | | [span2_index, dtype=uint32]|
|
||||
| | | |
|
||||
| | | [span1_text, dtype=string] |
|
||||
| | | |
|
||||
| | | [span2_text, dtype=string] |
|
||||
| | | |
|
||||
| | | [idx, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [text, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | test | [span1_index, dtype=uint32]|
|
||||
| | | |
|
||||
| | | [span2_index, dtype=uint32]|
|
||||
| | | |
|
||||
| | | [span1_text, dtype=string] |
|
||||
| | | |
|
||||
| | | [span2_text, dtype=string] |
|
||||
| | | |
|
||||
| | | [idx, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [text, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | eval | [span1_index, dtype=uint32]|
|
||||
| | | |
|
||||
| | | [span2_index, dtype=uint32]|
|
||||
| | | |
|
||||
| | | [span1_text, dtype=string] |
|
||||
| | | |
|
||||
| | | [span2_text, dtype=string] |
|
||||
| | | |
|
||||
| | | [idx, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [text, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
| CSL | train | [id, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [abst, dtype=string] |
|
||||
| | | |
|
||||
| | | [keyword, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | test | [id, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [abst, dtype=string] |
|
||||
| | | |
|
||||
| | | [keyword, dtype=string] |
|
||||
| +------------------------------+-----------------------------+
|
||||
| | eval | [id, dtype=uint32] |
|
||||
| | | |
|
||||
| | | [abst, dtype=string] |
|
||||
| | | |
|
||||
| | | [keyword, dtype=string] |
|
||||
| | | |
|
||||
| | | [label, dtype=string] |
|
||||
+-------------------------+------------------------------+-----------------------------+
|
||||
|
||||
Raises:
|
||||
ValueError: If dataset_files are not valid or do not exist.
|
||||
|
@ -379,9 +463,9 @@ class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test`, or `all`. `train` will read from
|
||||
8936 train samples, `test` will read from 2,012 test samples,
|
||||
`all` will read from all 1,0948 samples (default=None, all samples).
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test', or 'all'. 'train' will read from
|
||||
8936 train samples, 'test' will read from 2,012 test samples,
|
||||
'all' will read from all 1,0948 samples (default=None, all samples).
|
||||
num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
|
||||
shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
|
||||
(default=Shuffle.GLOBAL).
|
||||
|
@ -437,10 +521,10 @@ class DBpediaDataset(SourceDataset, TextBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`.
|
||||
`train` will read from 560,000 train samples,
|
||||
`test` will read from 70,000 test samples,
|
||||
`all` will read from all 630,000 samples (default=None, all samples).
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
|
||||
'train' will read from 560,000 train samples,
|
||||
'test' will read from 70,000 test samples,
|
||||
'all' will read from all 630,000 samples (default=None, all samples).
|
||||
num_samples (int, optional): The number of samples to be included in the dataset
|
||||
(default=None, will include all text).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
|
@ -608,7 +692,7 @@ class IMDBDataset(MappableDataset, TextBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'
|
||||
(default=None, will read all samples).
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, will read all samples).
|
||||
|
@ -749,16 +833,16 @@ class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Acceptable usages include "train", "valid", "test" and "all" (default=None, all samples).
|
||||
usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all' (default=None, all samples).
|
||||
language_pair (sequence, optional): Sequence containing source and target language, supported values are
|
||||
(`en`, `fr`), ("en", "de"), ("en", "cs"), ("en", "ar"), ("fr", "en"), ("de", "en"), ("cs", "en"),
|
||||
("ar", "en") (default=("de", "en")).
|
||||
('en', 'fr'), ('en', 'de'), ('en', 'cs'), ('en', 'ar'), ('fr', 'en'), ('de', 'en'), ('cs', 'en'),
|
||||
('ar', 'en') (default=('de', 'en')).
|
||||
valid_set (str, optional): A string to identify validation set, when usage is valid or all, the validation set
|
||||
of valid_set type will be read, supported values are "dev2010", "tst2010", "tst2011", "tst2012", "tst2013"
|
||||
and "tst2014" (default="tst2013").
|
||||
of valid_set type will be read, supported values are 'dev2010', 'tst2010', 'tst2011', 'tst2012', 'tst2013'
|
||||
and 'tst2014' (default='tst2013').
|
||||
test_set (str, optional): A string to identify test set, when usage is test or all, the test set of test_set
|
||||
type will be read, supported values are "dev2010", "tst2010", "tst2011", "tst2012", "tst2013" and "tst2014"
|
||||
(default="tst2014").
|
||||
type will be read, supported values are 'dev2010', 'tst2010', 'tst2011', 'tst2012', 'tst2013' and 'tst2014'
|
||||
(default='tst2014').
|
||||
num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
|
||||
shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
|
||||
(default=Shuffle.GLOBAL).
|
||||
|
@ -875,11 +959,11 @@ class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Acceptable usages include "train", "valid", "test" and "all" (default=None, all samples).
|
||||
language_pair (list, optional): List containing src and tgt language, supported values are ("en", "nl"),
|
||||
("en", "de"), ("en", "it"), ("en", "ro"), ("nl", "en"), ("nl", "de"), ("nl", "it"), ("nl", "ro"),
|
||||
("de", "en"), ("de", "nl"), ("de", "it"), ("de", "ro"), ("it", "en"), ("it", "nl"), ("it", "de"),
|
||||
("it", "ro"), (`ro`, `en`), (`ro`, `nl`), (`ro`, `de`), (`ro`, `it`) (default=(`de`, `en`)).
|
||||
usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all' (default=None, all samples).
|
||||
language_pair (list, optional): List containing src and tgt language, supported values are ('en', 'nl'),
|
||||
('en', 'de'), ('en', 'it'), ('en', 'ro'), ('nl', 'en'), ('nl', 'de'), ('nl', 'it'), ('nl', 'ro'),
|
||||
('de', 'en'), ('de', 'nl'), ('de', 'it'), ('de', 'ro'), ('it', 'en'), ('it', 'nl'), ('it', 'de'),
|
||||
('it', 'ro'), ('ro', 'en'), ('ro', 'nl'), ('ro', 'de'), ('ro', 'it') (default=('de', 'en')).
|
||||
num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
|
||||
shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
|
||||
(default=Shuffle.GLOBAL).
|
||||
|
@ -971,7 +1055,7 @@ class Multi30kDataset(SourceDataset, TextBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Acceptable usages include `train`, `test, `valid` or `all` (default=`all`).
|
||||
usage (str, optional): Acceptable usages include 'train', 'test, 'valid' or 'all' (default='all').
|
||||
language_pair (str, optional): Acceptable language_pair include ['en', 'de'], ['de', 'en']
|
||||
(default=['en', 'de']).
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
|
@ -998,7 +1082,7 @@ class Multi30kDataset(SourceDataset, TextBaseDataset):
|
|||
|
||||
Raises:
|
||||
RuntimeError: If dataset_dir does not contain data files.
|
||||
RuntimeError: If usage is not "train", "test", "valid" or "all".
|
||||
RuntimeError: If usage is not 'train', 'test', 'valid' or 'all'.
|
||||
RuntimeError: If the length of language_pair is not equal to 2.
|
||||
RuntimeError: If num_parallel_workers exceeds the max thread numbers.
|
||||
RuntimeError: If num_shards is specified but shard_id is None.
|
||||
|
@ -1070,7 +1154,7 @@ class PennTreebankDataset(SourceDataset, TextBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Acceptable usages include `train`, `test`, 'valid' and `all`.
|
||||
usage (str, optional): Acceptable usages include 'train', 'test', 'valid' and 'all'.
|
||||
'train' will read from 42,068 train samples of string type,
|
||||
'test' will read from 3,370 test samples of string type,
|
||||
'valid' will read from 3,761 test samples of string type,
|
||||
|
@ -1162,9 +1246,9 @@ class SogouNewsDataset(SourceDataset, TextBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` .
|
||||
`train` will read from 450,000 train samples, `test` will read from 60,000 test samples,
|
||||
`all` will read from all 510,000 samples (default=None, all samples).
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' .
|
||||
'train' will read from 450,000 train samples, 'test' will read from 60,000 test samples,
|
||||
'all' will read from all 510,000 samples (default=None, all samples).
|
||||
num_samples (int, optional): Number of samples (rows) to read (default=None, read all samples).
|
||||
shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
|
||||
(default=Shuffle.GLOBAL).
|
||||
|
@ -1303,9 +1387,9 @@ class UDPOSDataset(SourceDataset, TextBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test`, `valid` or `all`. `train` will read from
|
||||
12,543 train samples, `test` will read from 2,077 test samples, `valid` will read from 2,002 test samples,
|
||||
`all` will read from all 16,622 samples (default=None, all samples).
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test', 'valid' or 'all'. 'train' will read from
|
||||
12,543 train samples, 'test' will read from 2,077 test samples, 'valid' will read from 2,002 test samples,
|
||||
'all' will read from all 16,622 samples (default=None, all samples).
|
||||
num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
|
||||
shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
|
||||
(default=Shuffle.GLOBAL).
|
||||
|
@ -1359,7 +1443,7 @@ class WikiTextDataset(SourceDataset, TextBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Acceptable usages include `train`, `test`, 'valid' and `all` (default=None, all samples).
|
||||
usage (str, optional): Acceptable usages include 'train', 'test', 'valid' and 'all' (default=None, all samples).
|
||||
num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
(default=None, number set in the config).
|
||||
|
@ -1440,8 +1524,8 @@ class YahooAnswersDataset(SourceDataset, TextBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`. `train` will read
|
||||
from 1,400,000 train samples, `test` will read from 60,000 test samples, `all` will read from
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'. 'train' will read
|
||||
from 1,400,000 train samples, 'test' will read from 60,000 test samples, 'all' will read from
|
||||
all 1,460,000 samples (default=None, all samples).
|
||||
num_samples (int, optional): The number of samples to be included in the dataset
|
||||
(default=None, will include all text).
|
||||
|
@ -1533,11 +1617,11 @@ class YelpReviewDataset(SourceDataset, TextBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`.
|
||||
For Polarity, `train` will read from 560,000 train samples, `test` will read from 38,000 test samples,
|
||||
`all` will read from all 598,000 samples.
|
||||
For Full, `train` will read from 650,000 train samples, `test` will read from 50,000 test samples,
|
||||
`all` will read from all 700,000 samples (default=None, all samples).
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
|
||||
For Polarity, 'train' will read from 560,000 train samples, 'test' will read from 38,000 test samples,
|
||||
'all' will read from all 598,000 samples.
|
||||
For Full, 'train' will read from 650,000 train samples, 'test' will read from 50,000 test samples,
|
||||
'all' will read from all 700,000 samples (default=None, all samples).
|
||||
num_samples (int, optional): Number of samples (rows) to read (default=None, reads all samples).
|
||||
shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
|
||||
(default=Shuffle.GLOBAL).
|
||||
|
|
|
@ -830,11 +830,11 @@ class NumpySlicesDataset(GeneratorDataset):
|
|||
|
||||
Raises:
|
||||
RuntimeError: If len of column_names does not match output len of data.
|
||||
RuntimeError: If num_parallel_workers exceeds the max thread numbers.
|
||||
RuntimeError: If sampler and shuffle are specified at the same time.
|
||||
RuntimeError: If sampler and sharding are specified at the same time.
|
||||
RuntimeError: If num_shards is specified but shard_id is None.
|
||||
RuntimeError: If shard_id is specified but num_shards is None.
|
||||
ValueError: If num_parallel_workers exceeds the max thread numbers.
|
||||
ValueError: If sampler and shuffle are specified at the same time.
|
||||
ValueError: If sampler and sharding are specified at the same time.
|
||||
ValueError: If num_shards is specified but shard_id is None.
|
||||
ValueError: If shard_id is specified but num_shards is None.
|
||||
ValueError: If shard_id is invalid (< 0 or >= num_shards).
|
||||
|
||||
Examples:
|
||||
|
|
|
@ -122,9 +122,9 @@ class Caltech101Dataset(GeneratorDataset):
|
|||
dataset_dir (str): Path to the root directory that contains the dataset. This root directory contains two
|
||||
subdirectories, one is called 101_ObjectCategories, which stores images,
|
||||
and the other is called Annotations, which stores annotations.
|
||||
target_type (str, optional): Target of the image. If target_type is "category", return category represents
|
||||
the target class. If target_type is "annotation", return annotation.
|
||||
If target_type is "all", return category and annotation (default=None, means "category").
|
||||
target_type (str, optional): Target of the image. If target_type is 'category', return category represents
|
||||
the target class. If target_type is 'annotation', return annotation.
|
||||
If target_type is 'all', return category and annotation (default=None, means 'category').
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, all images).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data (default=1).
|
||||
|
@ -413,8 +413,8 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
|
|||
num_parallel_workers (int, optional): Number of workers to read the data (default=None, will use value set in
|
||||
the config).
|
||||
shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None).
|
||||
usage (str, optional): Specify the `train`, `valid`, `test` part or `all` parts of dataset
|
||||
(default= `all`, will read all samples).
|
||||
usage (str, optional): Specify the 'train', 'valid', 'test' part or 'all' parts of dataset
|
||||
(default= 'all', will read all samples).
|
||||
sampler (Sampler, optional): Object used to choose samples from the dataset (default=None).
|
||||
decode (bool, optional): decode the images after reading (default=False).
|
||||
extensions (list[str], optional): List of file extensions to be included in the dataset (default=None).
|
||||
|
@ -430,7 +430,8 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Raises:
|
||||
RuntimeError: If dataset_dir does not contain data files.
|
||||
RuntimeError: If num_parallel_workers exceeds the max thread numbers.
|
||||
ValueError: If num_parallel_workers exceeds the max thread numbers.
|
||||
ValueError: If usage is not 'train', 'valid', 'test' or 'all'.
|
||||
RuntimeError: If sampler and shuffle are specified at the same time.
|
||||
RuntimeError: If sampler and sharding are specified at the same time.
|
||||
RuntimeError: If num_shards is specified but shard_id is None.
|
||||
|
@ -574,8 +575,8 @@ class Cifar10Dataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` . `train` will read from 50,000
|
||||
train samples, `test` will read from 10,000 test samples, `all` will read from all 60,000 samples
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' . 'train' will read from 50,000
|
||||
train samples, 'test' will read from 10,000 test samples, 'all' will read from all 60,000 samples
|
||||
(default=None, all samples).
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, all images).
|
||||
|
@ -595,7 +596,8 @@ class Cifar10Dataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Raises:
|
||||
RuntimeError: If dataset_dir does not contain data files.
|
||||
RuntimeError: If num_parallel_workers exceeds the max thread numbers.
|
||||
ValueError: If num_parallel_workers exceeds the max thread numbers.
|
||||
ValueError: If usage is not 'train', 'test' or 'all'.
|
||||
RuntimeError: If sampler and shuffle are specified at the same time.
|
||||
RuntimeError: If sampler and sharding are specified at the same time.
|
||||
RuntimeError: If num_shards is specified but shard_id is None.
|
||||
|
@ -704,8 +706,8 @@ class Cifar100Dataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` . `train` will read from 50,000
|
||||
train samples, `test` will read from 10,000 test samples, `all` will read from all 60,000 samples
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' . 'train' will read from 50,000
|
||||
train samples, 'test' will read from 10,000 test samples, 'all' will read from all 60,000 samples
|
||||
(default=None, all samples).
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, all images).
|
||||
|
@ -725,7 +727,8 @@ class Cifar100Dataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Raises:
|
||||
RuntimeError: If dataset_dir does not contain data files.
|
||||
RuntimeError: If num_parallel_workers exceeds the max thread numbers.
|
||||
ValueError: If num_parallel_workers exceeds the max thread numbers.
|
||||
ValueError: If usage is not 'train', 'test' or 'all'.
|
||||
RuntimeError: If sampler and shuffle are specified at the same time.
|
||||
RuntimeError: If sampler and sharding are specified at the same time.
|
||||
RuntimeError: If num_shards is specified but shard_id is None.
|
||||
|
@ -829,10 +832,10 @@ class CityscapesDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str): Acceptable usages include `train`, `test`, `val` or `all` if quality_mode is `fine`
|
||||
otherwise `train`, `train_extra`, `val` or `all` (default= `train`).
|
||||
quality_mode (str): Acceptable quality_modes include `fine` or `coarse` (default= `fine`).
|
||||
task (str): Acceptable tasks include `instance`, `semantic`, `polygon` or `color` (default= `instance`).
|
||||
usage (str): Acceptable usages include 'train', 'test', 'val' or 'all' if quality_mode is 'fine'
|
||||
otherwise 'train', 'train_extra', 'val' or 'all' (default= 'train').
|
||||
quality_mode (str): Acceptable quality_modes include 'fine' or 'coarse' (default= 'fine').
|
||||
task (str): Acceptable tasks include 'instance', 'semantic', 'polygon' or 'color' (default= 'instance').
|
||||
num_samples (int, optional): The number of images to be included in the dataset.
|
||||
(default=None, all images).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
|
@ -995,23 +998,11 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
|
|||
CocoDataset supports five kinds of tasks, which are Object Detection, Keypoint Detection, Stuff Segmentation,
|
||||
Panoptic Segmentation and Captioning of 2017 Train/Val/Test dataset.
|
||||
|
||||
The generated dataset with different task setting has different output columns:
|
||||
|
||||
- task = :py:obj:`Detection`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[bbox, dtype=float32]`, \
|
||||
:py:obj:`[category_id, dtype=uint32]`, :py:obj:`[iscrowd, dtype=uint32]`.
|
||||
- task = :py:obj:`Stuff`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[segmentation,dtype=float32]`, \
|
||||
:py:obj:`[iscrowd,dtype=uint32]`.
|
||||
- task = :py:obj:`Keypoint`, output columns: :py:obj:`[image, dtype=uint8]`, \
|
||||
:py:obj:`[keypoints, dtype=float32]`, :py:obj:`[num_keypoints, dtype=uint32]`.
|
||||
- task = :py:obj:`Panoptic`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[bbox, dtype=float32]`, \
|
||||
:py:obj:`[category_id, dtype=uint32]`, :py:obj:`[iscrowd, dtype=uint32]`, :py:obj:`[area, dtype=uint32]`.
|
||||
- task = :py:obj:`Captioning`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[captions, dtype=string]`.
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
annotation_file (str): Path to the annotation JSON file.
|
||||
task (str, optional): Set the task type for reading COCO data. Supported task types:
|
||||
`Detection`, `Stuff`, `Panoptic`, `Keypoint` and `Captioning` (default=`Detection`).
|
||||
'Detection', 'Stuff', 'Panoptic', 'Keypoint' and 'Captioning' (default='Detection').
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, all images).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
|
@ -1031,15 +1022,51 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
|
|||
extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column will be
|
||||
output at the end :py:obj:`[_meta-filename, dtype=string]` (default=False).
|
||||
|
||||
The generated dataset with different task setting has different output columns:
|
||||
|
||||
+-------------------------+----------------------------------------------+
|
||||
| `task` | Output column |
|
||||
+=========================+==============================================+
|
||||
| Detection | [image, dtype=uint8] |
|
||||
| | |
|
||||
| | [bbox, dtype=float32] |
|
||||
| | |
|
||||
| | [category_id, dtype=uint32] |
|
||||
| | |
|
||||
| | [iscrowd, dtype=uint32] |
|
||||
+-------------------------+----------------------------------------------+
|
||||
| Stuff | [image, dtype=uint8] |
|
||||
| | |
|
||||
| | [segmentation, dtype=float32] |
|
||||
| | |
|
||||
| | [iscrowd, dtype=uint32] |
|
||||
+-------------------------+----------------------------------------------+
|
||||
| Keypoint | [image, dtype=uint8] |
|
||||
| | |
|
||||
| | [keypoints, dtype=float32] |
|
||||
| | |
|
||||
| | [num_keypoints, dtype=uint32] |
|
||||
+-------------------------+----------------------------------------------+
|
||||
| Panoptic | [image, dtype=uint8] |
|
||||
| | |
|
||||
| | [bbox, dtype=float32] |
|
||||
| | |
|
||||
| | [category_id, dtype=uint32] |
|
||||
| | |
|
||||
| | [iscrowd, dtype=uint32] |
|
||||
| | |
|
||||
| | [area, dtype=uint32] |
|
||||
+-------------------------+----------------------------------------------+
|
||||
|
||||
Raises:
|
||||
RuntimeError: If dataset_dir does not contain data files.
|
||||
RuntimeError: If num_parallel_workers exceeds the max thread numbers.
|
||||
RuntimeError: If sampler and shuffle are specified at the same time.
|
||||
RuntimeError: If sampler and sharding are specified at the same time.
|
||||
RuntimeError: If num_shards is specified but shard_id is None.
|
||||
RuntimeError: If shard_id is specified but num_shards is None.
|
||||
RuntimeError: If parse JSON file failed.
|
||||
ValueError: If task is not in [`Detection`, `Stuff`, `Panoptic`, `Keypoint`, `Captioning`].
|
||||
ValueError: If num_parallel_workers exceeds the max thread numbers.
|
||||
ValueError: If task is not in ['Detection', 'Stuff', 'Panoptic', 'Keypoint', 'Captioning'].
|
||||
ValueError: If annotation_file is not exist.
|
||||
ValueError: If dataset_dir is not exist.
|
||||
ValueError: If shard_id is invalid (< 0 or >= num_shards).
|
||||
|
@ -1215,13 +1242,13 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str): Acceptable usages include `train`, `valid` or `all` (default= `train`).
|
||||
downgrade (str): Acceptable downgrades include `bicubic`, `unknown`, `mild`, `difficult` or
|
||||
`wild` (default= `bicubic`).
|
||||
usage (str): Acceptable usages include 'train', 'valid' or 'all' (default= 'train').
|
||||
downgrade (str): Acceptable downgrades include 'bicubic', 'unknown', 'mild', 'difficult' or
|
||||
'wild' (default= 'bicubic').
|
||||
scale (int): Acceptable scales include 2, 3, 4 or 8 (default=2).
|
||||
When `downgrade` is `bicubic`, scale can be 2, 3, 4, 8.
|
||||
When `downgrade` is `unknown`, scale can only be 2, 3, 4.
|
||||
When `downgrade` is `mild`, `difficult` or `wild`, scale can only be 4.
|
||||
When `downgrade` is 'bicubic', scale can be 2, 3, 4, 8.
|
||||
When `downgrade` is 'unknown', scale can only be 2, 3, 4.
|
||||
When `downgrade` is 'mild', 'difficult' or 'wild', scale can only be 4.
|
||||
num_samples (int, optional): The number of images to be included in the dataset.
|
||||
(default=None, all images).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
|
@ -1250,8 +1277,8 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset):
|
|||
ValueError: If usage is invalid.
|
||||
ValueError: If downgrade is invalid.
|
||||
ValueError: If scale is invalid.
|
||||
ValueError: If scale equal to 8 and downgrade not equal to `bicubic`.
|
||||
ValueError: If downgrade in [`mild`, `difficult`, `wild`] and scale not equal to 4.
|
||||
ValueError: If scale equal to 8 and downgrade not equal to 'bicubic'.
|
||||
ValueError: If downgrade in ['mild', 'difficult', 'wild'] and scale not equal to 4.
|
||||
ValueError: If shard_id is invalid (< 0 or >= num_shards).
|
||||
|
||||
Note:
|
||||
|
@ -1402,9 +1429,9 @@ class EMnistDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
name (str): Name of splits for this dataset, can be "byclass", "bymerge", "balanced", "letters", "digits"
|
||||
or "mnist".
|
||||
usage (str, optional): Usage of this dataset, can be "train", "test" or "all".
|
||||
name (str): Name of splits for this dataset, can be 'byclass', 'bymerge', 'balanced', 'letters', 'digits'
|
||||
or 'mnist'.
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
|
||||
(default=None, will read all samples).
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, will read all images).
|
||||
|
@ -1623,8 +1650,8 @@ class FashionMnistDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`. `train` will read from 60,000
|
||||
train samples, `test` will read from 10,000 test samples, `all` will read from all 70,000 samples.
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'. 'train' will read from 60,000
|
||||
train samples, 'test' will read from 10,000 test samples, 'all' will read from all 70,000 samples.
|
||||
(default=None, will read all samples)
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, will read all images).
|
||||
|
@ -2176,7 +2203,7 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Raises:
|
||||
RuntimeError: If dataset_dir does not contain data files.
|
||||
RuntimeError: If num_parallel_workers exceeds the max thread numbers.
|
||||
ValueError: If num_parallel_workers exceeds the max thread numbers.
|
||||
RuntimeError: If sampler and shuffle are specified at the same time.
|
||||
RuntimeError: If sampler and sharding are specified at the same time.
|
||||
RuntimeError: If num_shards is specified but shard_id is None.
|
||||
|
@ -2280,8 +2307,8 @@ class KMnistDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` . `train` will read from 60,000
|
||||
train samples, `test` will read from 10,000 test samples, `all` will read from all 70,000 samples.
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' . 'train' will read from 60,000
|
||||
train samples, 'test' will read from 10,000 test samples, 'all' will read from all 70,000 samples.
|
||||
(default=None, will read all samples)
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, will read all images).
|
||||
|
@ -2401,7 +2428,7 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_file (str): File to be read.
|
||||
usage (str, optional): Acceptable usages include `train`, `eval` and `inference` (default= `train`).
|
||||
usage (str, optional): Acceptable usages include 'train', 'eval' and 'inference' (default= 'train').
|
||||
num_samples (int, optional): The number of images to be included in the dataset.
|
||||
(default=None, will include all images).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
|
@ -2424,7 +2451,7 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Raises:
|
||||
RuntimeError: If dataset_files are not valid or do not exist.
|
||||
RuntimeError: If num_parallel_workers exceeds the max thread numbers.
|
||||
ValueError: If num_parallel_workers exceeds the max thread numbers.
|
||||
RuntimeError: If sampler and shuffle are specified at the same time.
|
||||
RuntimeError: If sampler and sharding are specified at the same time.
|
||||
RuntimeError: If num_shards is specified but shard_id is None.
|
||||
|
@ -2520,8 +2547,8 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` . `train` will read from 60,000
|
||||
train samples, `test` will read from 10,000 test samples, `all` will read from all 70,000 samples.
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' . 'train' will read from 60,000
|
||||
train samples, 'test' will read from 10,000 test samples, 'all' will read from all 70,000 samples.
|
||||
(default=None, will read all samples)
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, will read all images).
|
||||
|
@ -2645,11 +2672,11 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
|
|||
name (str): Name of the dataset to load,
|
||||
should be one of 'notredame', 'yosemite', 'liberty', 'notredame_harris',
|
||||
'yosemite_harris' or 'liberty_harris'.
|
||||
usage (str, optional): Usage of the dataset, can be `train` or `test` (Default=None, will be set to 'train').
|
||||
When usage is `train`, number of samples for each `name` is
|
||||
usage (str, optional): Usage of the dataset, can be 'train' or 'test' (Default=None, will be set to 'train').
|
||||
When usage is 'train', number of samples for each `name` is
|
||||
{'notredame': 468159, 'yosemite': 633587, 'liberty': 450092, 'liberty_harris': 379587,
|
||||
'yosemite_harris': 450912, 'notredame_harris': 325295}.
|
||||
When usage is `test`, will read 100,000 samples for testing.
|
||||
When usage is 'test', will read 100,000 samples for testing.
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, will read all images).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
|
@ -2794,7 +2821,7 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train-standard`, `train-challenge` or `val`
|
||||
usage (str, optional): Usage of this dataset, can be 'train-standard', 'train-challenge' or 'val'
|
||||
(default=None, will be set to 'train-standard').
|
||||
small (bool, optional): Use 256 * 256 images (True) or high resolution images (False) (default=False).
|
||||
decode (bool, optional): Decode the images after reading (default=True).
|
||||
|
@ -2936,8 +2963,8 @@ class QMnistDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test`, `test10k`, `test50k`, `nist`
|
||||
or `all` (default=None, will read all samples).
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test', 'test10k', 'test50k', 'nist'
|
||||
or 'all' (default=None, will read all samples).
|
||||
compat (bool, optional): Whether the label for each example is class number (compat=True) or the full QMNIST
|
||||
information (compat=False) (default=True).
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
|
@ -3158,13 +3185,13 @@ class SBDataset(GeneratorDataset):
|
|||
|
||||
The generated dataset has two columns: :py:obj:`[image, task]`.
|
||||
The tensor of column :py:obj:`image` is of the uint8 type.
|
||||
The tensor of column :py:obj:`task` contains 20 images of the uint8 type if `task` is `Boundaries` otherwise
|
||||
The tensor of column :py:obj:`task` contains 20 images of the uint8 type if `task` is 'Boundaries' otherwise
|
||||
contains 1 image of the uint8 type.
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
task (str, optional): Acceptable tasks include `Boundaries` or `Segmentation` (default= `Boundaries`).
|
||||
usage (str, optional): Acceptable usages include `train`, `val`, `train_noval` and `all` (default= `all`).
|
||||
task (str, optional): Acceptable tasks include 'Boundaries' or 'Segmentation' (default= 'Boundaries').
|
||||
usage (str, optional): Acceptable usages include 'train', 'val', 'train_noval' and 'all' (default= 'all').
|
||||
num_samples (int, optional): The number of images to be included in the dataset.
|
||||
(default=None, all images).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
|
@ -3188,8 +3215,8 @@ class SBDataset(GeneratorDataset):
|
|||
RuntimeError: If num_shards is specified but shard_id is None.
|
||||
RuntimeError: If shard_id is specified but num_shards is None.
|
||||
ValueError: If dataset_dir is not exist.
|
||||
ValueError: If task is not in [`Boundaries`, `Segmentation`].
|
||||
ValueError: If usage is not in [`train`, `val`, `train_noval`, `all`].
|
||||
ValueError: If task is not in ['Boundaries', 'Segmentation'].
|
||||
ValueError: If usage is not in ['train', 'val', 'train_noval', 'all'].
|
||||
ValueError: If shard_id is invalid (< 0 or >= num_shards).
|
||||
|
||||
Note:
|
||||
|
@ -3528,11 +3555,11 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be "train", "test",
|
||||
"unlabeled", "train+unlabeled" or "all" . "train" will read from 5,000
|
||||
train samples, "test" will read from 8,000 test samples,
|
||||
"unlabeled" will read from all 100,000 samples, and "train+unlabeled"
|
||||
will read from 105000 samples, "all" will read all the samples
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test',
|
||||
'unlabeled', 'train+unlabeled' or 'all' . 'train' will read from 5,000
|
||||
train samples, 'test' will read from 8,000 test samples,
|
||||
'unlabeled' will read from all 100,000 samples, and 'train+unlabeled'
|
||||
will read from 105000 samples, 'all' will read all the samples
|
||||
(default=None, all samples).
|
||||
num_samples (int, optional): The number of images to be included in the dataset.
|
||||
(default=None, all images).
|
||||
|
@ -3812,8 +3839,8 @@ class USPSDataset(SourceDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be "train", "test" or "all". "train" will read from 7,291
|
||||
train samples, "test" will read from 2,007 test samples, "all" will read from all 9,298 samples.
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'. 'train' will read from 7,291
|
||||
train samples, 'test' will read from 2,007 test samples, 'all' will read from all 9,298 samples.
|
||||
(default=None, will read all samples)
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, will read all images).
|
||||
|
@ -3912,14 +3939,14 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
task (str, optional): Set the task type of reading voc data, now only support `Segmentation` or `Detection`
|
||||
(default= `Segmentation`).
|
||||
usage (str, optional): Set the task type of ImageSets(default= `train`). If task is `Segmentation`, image and
|
||||
annotation list will be loaded in ./ImageSets/Segmentation/usage + ".txt"; If task is `Detection`, image and
|
||||
task (str, optional): Set the task type of reading voc data, now only support 'Segmentation' or 'Detection'
|
||||
(default= 'Segmentation').
|
||||
usage (str, optional): Set the task type of ImageSets(default= 'train'). If task is 'Segmentation', image and
|
||||
annotation list will be loaded in ./ImageSets/Segmentation/usage + ".txt"; If task is 'Detection', image and
|
||||
annotation list will be loaded in ./ImageSets/Main/usage + ".txt"; if task and usage are not set, image and
|
||||
annotation list will be loaded in ./ImageSets/Segmentation/train.txt as default.
|
||||
class_indexing (dict, optional): A str-to-int mapping from label name to index, only valid in
|
||||
`Detection` task (default=None, the folder names will be sorted alphabetically and each
|
||||
'Detection' task (default=None, the folder names will be sorted alphabetically and each
|
||||
class will be given a unique index starting from 0).
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, all images).
|
||||
|
@ -3942,7 +3969,6 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Raises:
|
||||
RuntimeError: If dataset_dir does not contain data files.
|
||||
RuntimeError: If num_parallel_workers exceeds the max thread numbers.
|
||||
RuntimeError: If xml of Annotations is an invalid format.
|
||||
RuntimeError: If xml of Annotations loss attribution of `object`.
|
||||
RuntimeError: If xml of Annotations loss attribution of `bndbox`.
|
||||
|
@ -3950,6 +3976,7 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|||
RuntimeError: If sampler and sharding are specified at the same time.
|
||||
RuntimeError: If num_shards is specified but shard_id is None.
|
||||
RuntimeError: If shard_id is specified but num_shards is None.
|
||||
ValueError: If num_parallel_workers exceeds the max thread numbers.
|
||||
ValueError: If task is not equal 'Segmentation' or 'Detection'.
|
||||
ValueError: If task equal 'Segmentation' but class_indexing is not None.
|
||||
ValueError: If txt related to mode is not exist.
|
||||
|
@ -4120,9 +4147,9 @@ class WIDERFaceDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be `train`, `test`, `valid` or `all`. `train` will read
|
||||
from 12,880 samples, `test` will read from 16,097 samples, `valid` will read from 3,226 test samples
|
||||
and `all` will read all `train` and `valid` samples (default=None, will be set to `all`).
|
||||
usage (str, optional): Usage of this dataset, can be 'train', 'test', 'valid' or 'all'. 'train' will read
|
||||
from 12,880 samples, 'test' will read from 16,097 samples, 'valid' will read from 3,226 test samples
|
||||
and 'all' will read all 'train' and 'valid' samples (default=None, will be set to 'all').
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, will read all images).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
|
@ -4147,7 +4174,7 @@ class WIDERFaceDataset(MappableDataset, VisionBaseDataset):
|
|||
RuntimeError: If num_shards is specified but shard_id is None.
|
||||
RuntimeError: If shard_id is specified but num_shards is None.
|
||||
ValueError: If shard_id is invalid (< 0 or >= num_shards).
|
||||
ValueError: If usage is not in [`train`, `test`, `valid`, `all`].
|
||||
ValueError: If usage is not in ['train', 'test', 'valid', 'all'].
|
||||
ValueError: If annotation_file is not exist.
|
||||
ValueError: If dataset_dir is not exist.
|
||||
ValueError: If shard_id is invalid (< 0 or >= num_shards).
|
||||
|
|
|
@ -104,7 +104,7 @@ class GraphData:
|
|||
|
||||
Raises:
|
||||
ValueError: If `dataset_file` does not exist or permission denied.
|
||||
TypeError: If `num_parallel_workers` exceeds the max thread numbers.
|
||||
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
||||
ValueError: If `working_mode` is not 'local', 'client' or 'server'.
|
||||
TypeError: If `hostname` is illegal.
|
||||
ValueError: If `port` is not in range [1024, 65535].
|
||||
|
|
Loading…
Reference in New Issue