forked from mindspore-Ecosystem/mindspore
fix the description of cache
This commit is contained in:
parent
c617a07dff
commit
543b75f366
|
@ -388,6 +388,13 @@ uint32_t DatasetOp::GenerateCRC(const std::shared_ptr<DatasetOp> &op) {
|
|||
op->tree_->Print(ss, op);
|
||||
std::string ss_str = ss.str();
|
||||
|
||||
// Filter out the Num workers field when generating the check sum
|
||||
ss_str = std::regex_replace(ss_str, std::regex("Num workers.*\n"), "");
|
||||
ss_str = std::regex_replace(ss_str, std::regex("\\[workers.*\\]"), "");
|
||||
|
||||
// Filter out Number of rows when generating the check sum
|
||||
ss_str = std::regex_replace(ss_str, std::regex("Number of rows.*\n"), "");
|
||||
|
||||
// Filter out the Operator control flags field when generating the check sum
|
||||
ss_str = std::regex_replace(ss_str, std::regex("Operator control flags.*\n"), "");
|
||||
|
||||
|
@ -400,6 +407,8 @@ uint32_t DatasetOp::GenerateCRC(const std::shared_ptr<DatasetOp> &op) {
|
|||
ss_str = std::regex_replace(ss_str, std::regex("Cache crc.*\n"), "");
|
||||
ss_str = std::regex_replace(ss_str, std::regex("Server cache id.*\n"), "");
|
||||
|
||||
MS_LOG(DEBUG) << "Printing the tree for generating crc:\n" << ss_str;
|
||||
|
||||
uint32_t cache_crc = system::Crc32c::GetMaskCrc32cValue(ss_str.c_str(), ss_str.length());
|
||||
return cache_crc;
|
||||
}
|
||||
|
|
|
@ -434,7 +434,8 @@ class Dataset:
|
|||
parallel (default=None, the value from the config will be used).
|
||||
python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This
|
||||
option could be beneficial if the python operation is computational heavy (default=False).
|
||||
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
|
||||
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
|
||||
The cache feature is under development and is not recommended.
|
||||
|
||||
Returns:
|
||||
MapDataset, dataset after mapping operation.
|
||||
|
@ -1899,7 +1900,9 @@ class MapDataset(DatasetOp):
|
|||
in parallel (default=None).
|
||||
python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This
|
||||
option could be beneficial if the python operation is computational heavy (default=False).
|
||||
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
|
||||
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
|
||||
The cache feature is under development and is not recommended.
|
||||
|
||||
|
||||
Raises:
|
||||
ValueError: If len(input_columns) != len(output_columns) and columns_order is not specified.
|
||||
|
@ -2089,6 +2092,7 @@ class RepeatDataset(DatasetOp):
|
|||
"""
|
||||
return self.count
|
||||
|
||||
|
||||
class SkipDataset(DatasetOp):
|
||||
"""
|
||||
The result of applying Skip operator to the input Dataset.
|
||||
|
@ -2354,6 +2358,7 @@ class TransferDataset(DatasetOp):
|
|||
def stop_send(self):
|
||||
self.iterator.depipeline.StopSend()
|
||||
|
||||
|
||||
class RangeDataset(MappableDataset):
|
||||
"""
|
||||
A source dataset that reads and parses datasets stored on disk in a range.
|
||||
|
@ -2500,7 +2505,8 @@ class ImageFolderDatasetV2(MappableDataset):
|
|||
into (default=None).
|
||||
shard_id (int, optional): The shard ID within num_shards (default=None). This
|
||||
argument should be specified only when num_shards is also specified.
|
||||
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
|
||||
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
|
||||
The cache feature is under development and is not recommended.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If sampler and shuffle are specified at the same time.
|
||||
|
@ -3296,7 +3302,8 @@ class TFRecordDataset(SourceDataset):
|
|||
argument should be specified only when num_shards is also specified.
|
||||
shard_equal_rows (bool): Get equal rows for all shards(default=False). If shard_equal_rows is false, number
|
||||
of rows of each shard may be not equal.
|
||||
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
|
||||
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
|
||||
The cache feature is under development and is not recommended.
|
||||
Examples:
|
||||
>>> import mindspore.dataset as ds
|
||||
>>> import mindspore.common.dtype as mstype
|
||||
|
@ -3867,7 +3874,8 @@ class RandomDataset(SourceDataset):
|
|||
num_samples (int): number of samples to draw from the total. (default=None, which means all rows)
|
||||
num_parallel_workers (int, optional): number of workers to read the data
|
||||
(default=None, number set in the config).
|
||||
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
|
||||
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
|
||||
The cache feature is under development and is not recommended.
|
||||
shuffle (bool, optional): Whether or not to perform shuffle on the dataset
|
||||
(default=None, expected order behavior shown in the table).
|
||||
num_shards (int, optional): Number of shards that the dataset should be divided
|
||||
|
@ -5261,6 +5269,7 @@ class BuildVocabDataset(DatasetOp):
|
|||
|
||||
return new_op
|
||||
|
||||
|
||||
class BuildSentencePieceVocabDataset(DatasetOp):
|
||||
"""
|
||||
Build a SentencePieceVocab from a dataset.
|
||||
|
|
|
@ -24,6 +24,7 @@ DATA_DIR = "../data/dataset/testImageNetData/train/"
|
|||
|
||||
GENERATE_GOLDEN = False
|
||||
|
||||
|
||||
def test_cache_map_basic1():
|
||||
"""
|
||||
Test mappable leaf with cache op right over the leaf
|
||||
|
@ -104,11 +105,36 @@ def test_cache_map_basic3():
|
|||
decode_op = c_vision.Decode()
|
||||
ds1 = ds1.repeat(4)
|
||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
|
||||
print("ds1.dataset_size is ", ds1.get_dataset_size())
|
||||
logger.info("ds1.dataset_size is ", ds1.get_dataset_size())
|
||||
|
||||
num_iter = 0
|
||||
for _ in ds1.create_dict_iterator():
|
||||
print("get data from dataset")
|
||||
logger.info("get data from dataset")
|
||||
num_iter += 1
|
||||
|
||||
logger.info("Number of data in ds1: {} ".format(num_iter))
|
||||
assert num_iter == 8
|
||||
logger.info('test_cache_basic3 Ended.\n')
|
||||
|
||||
|
||||
def test_cache_map_basic4():
|
||||
"""
|
||||
Test different rows result in core dump
|
||||
"""
|
||||
logger.info("Test cache basic 4")
|
||||
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
|
||||
|
||||
# This DATA_DIR only has 2 images in it
|
||||
ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR, cache=some_cache)
|
||||
decode_op = c_vision.Decode()
|
||||
ds1 = ds1.repeat(4)
|
||||
ds1 = ds1.map(input_columns=["image"], operations=decode_op)
|
||||
logger.info("ds1.dataset_size is ", ds1.get_dataset_size())
|
||||
shape = ds1.output_shapes()
|
||||
logger.info(shape)
|
||||
num_iter = 0
|
||||
for _ in ds1.create_dict_iterator():
|
||||
logger.info("get data from dataset")
|
||||
num_iter += 1
|
||||
|
||||
logger.info("Number of data in ds1: {} ".format(num_iter))
|
||||
|
@ -152,12 +178,15 @@ def test_cache_map_failure1():
|
|||
assert num_iter == 0
|
||||
logger.info('test_cache_failure1 Ended.\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_cache_map_basic1()
|
||||
print("test_cache_map_basic1 success.")
|
||||
logger.info("test_cache_map_basic1 success.")
|
||||
test_cache_map_basic2()
|
||||
print("test_cache_map_basic2 success.")
|
||||
logger.info("test_cache_map_basic2 success.")
|
||||
test_cache_map_basic3()
|
||||
print("test_cache_map_basic3 success.")
|
||||
logger.info("test_cache_map_basic3 success.")
|
||||
test_cache_map_basic4()
|
||||
logger.info("test_cache_map_basic3 success.")
|
||||
test_cache_map_failure1()
|
||||
print("test_cache_map_failure1 success.")
|
||||
logger.info("test_cache_map_failure1 success.")
|
||||
|
|
Loading…
Reference in New Issue