fix: mindrecord page size comment

This commit is contained in:
jonyguo 2022-11-08 15:34:35 +08:00
parent 28fdfb512a
commit 4be1c74181
6 changed files with 23 additions and 10 deletions

View File

@ -3,7 +3,7 @@ mindspore.dataset.Dataset.batch
.. py:method:: mindspore.dataset.Dataset.batch(batch_size, drop_remainder=False, num_parallel_workers=None, **kwargs)
将数据集中连续 `batch_size` 条数据合并为一个批处理数据其中batch成一个Tensor前可选择使用 `per_batch_map` 对样本进行处理
将数据集中连续 `batch_size` 条数据组合为一个批数据,并可通过可选参数 `per_batch_map` 指定组合前要进行的预处理操作
`batch` 操作要求每列中的数据具有相同的shape。

View File

@ -3,7 +3,7 @@ mindspore.dataset.Dataset.padded_batch
.. py:method:: mindspore.dataset.Dataset.padded_batch(batch_size, drop_remainder=False, num_parallel_workers=None, pad_info=None)
将数据集中连续 `batch_size` 条数据合并为一个批处理数据其中batch成一个Tensor前可选择使用 `pad_info` 预先将样本补齐。
将数据集中连续 `batch_size` 条数据组合为一个批数据,并可通过可选参数 `pad_info` 预先将样本补齐。
`batch` 操作要求每列中的数据具有相同的shape。

View File

@ -235,6 +235,9 @@ Status DataQueueOp::SendDataToAscend() {
TensorRow curr_row;
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&curr_row));
first_fetch_flag_ = true;
MS_LOG(INFO) << "Begin to send data to device, channel name: " << channel_name_;
while (!curr_row.eof() && !is_break_loop) {
while (!curr_row.eoe() && !is_break_loop) {
RETURN_IF_NOT_OK(FilterMetadata(&curr_row));
@ -263,6 +266,7 @@ Status DataQueueOp::SendDataToAscend() {
batch_record_start = ProfilingTime::GetCurMilliSecond();
#endif
send_batch++;
MS_LOG(INFO) << "Have sent " << send_batch << " batch(es) to device, channel name: " << channel_name_;
#ifdef ENABLE_DUMP_IR
RETURN_IF_NOT_OK(md_channel_info_->RecordBatchQueue(ChildOpConnectorSize()));
RETURN_IF_NOT_OK(md_channel_info_->RecordPreprocessBatch(send_batch));
@ -543,6 +547,7 @@ Status DataQueueOp::PushDataToGPU() {
gpu_connector_->capacity(), gpu_connector_->size());
#endif
send_batch++;
MS_LOG(INFO) << "Have sent " << send_batch << " batch(es) to device, channel name: " << channel_name_;
#ifdef ENABLE_DUMP_IR
md_channel_info_->RecordBatchQueue(gpu_connector_->size());
md_channel_info_->RecordPreprocessBatch(send_batch);
@ -638,6 +643,9 @@ Status DataQueueOp::SendDataToGPU() {
first_fetch_flag_ = true;
int64_t num_buf = 0;
bool is_break_loop = false;
MS_LOG(INFO) << "Begin to send data to device, channel name: " << channel_name_;
while (!current_row.eof() && !is_break_loop && !device::DataQueueMgr::GetInstance().IsClosed()) {
while (!current_row.eoe() && !is_break_loop && !device::DataQueueMgr::GetInstance().IsClosed()) {
RETURN_IF_NOT_OK(FilterMetadata(&current_row));

View File

@ -246,8 +246,8 @@ Status ShardWriter::SetHeaderSize(const uint64_t &header_size) {
// header_size [16KB, 128MB]
CHECK_FAIL_RETURN_UNEXPECTED_MR(header_size >= kMinHeaderSize && header_size <= kMaxHeaderSize,
"Invalid data, header size: " + std::to_string(header_size) +
" should be in range [" + std::to_string(kMinHeaderSize) + "MB, " +
std::to_string(kMaxHeaderSize) + "MB].");
" should be in range [" + std::to_string(kMinHeaderSize) + " bytes, " +
std::to_string(kMaxHeaderSize) + " bytes].");
CHECK_FAIL_RETURN_UNEXPECTED_MR(
header_size % 4 == 0, "Invalid data, header size " + std::to_string(header_size) + " should be divided by four.");
header_size_ = header_size;
@ -258,7 +258,8 @@ Status ShardWriter::SetPageSize(const uint64_t &page_size) {
// PageSize [32KB, 256MB]
CHECK_FAIL_RETURN_UNEXPECTED_MR(page_size >= kMinPageSize && page_size <= kMaxPageSize,
"Invalid data, page size: " + std::to_string(page_size) + " should be in range [" +
std::to_string(kMinPageSize) + "MB, " + std::to_string(kMaxPageSize) + "MB].");
std::to_string(kMinPageSize) + " bytes, " + std::to_string(kMaxPageSize) +
" bytes].");
CHECK_FAIL_RETURN_UNEXPECTED_MR(
page_size % 4 == 0, "Invalid data, page size " + std::to_string(page_size) + " should be divided by four.");
page_size_ = page_size;
@ -1135,7 +1136,9 @@ Status ShardWriter::SetRawDataSize(const std::vector<std::vector<uint8_t>> &bin_
CHECK_FAIL_RETURN_SYNTAX_ERROR_MR(*std::max_element(raw_data_size_.begin(), raw_data_size_.end()) <= page_size_,
"Invalid data, Page size: " + std::to_string(page_size_) +
" is too small to save a raw row. Please try to use the mindrecord api "
"'set_page_size(1<<25)' to enable 64MB page size.");
"'set_page_size(value)' to enable larger page size, and the value range is in [" +
std::to_string(kMinPageSize) + " bytes, " + std::to_string(kMaxPageSize) +
" bytes].");
return Status::OK();
}
@ -1146,7 +1149,9 @@ Status ShardWriter::SetBlobDataSize(const std::vector<std::vector<uint8_t>> &blo
CHECK_FAIL_RETURN_SYNTAX_ERROR_MR(*std::max_element(blob_data_size_.begin(), blob_data_size_.end()) <= page_size_,
"Invalid data, Page size: " + std::to_string(page_size_) +
" is too small to save a blob row. Please try to use the mindrecord api "
"'set_page_size(1<<25)' to enable 64MB page size.");
"'set_page_size(value)' to enable larger page size, and the value range is in [" +
std::to_string(kMinPageSize) + " bytes, " + std::to_string(kMaxPageSize) +
" bytes].");
return Status::OK();
}

View File

@ -1395,7 +1395,7 @@ class MelScale(AudioTensorOperation):
Args:
n_mels (int, optional): Number of mel filterbanks. Default: 128.
sample_rate (int, optional): Sample rate of audio signal. Default: 16000.
f_min (float, optional): Minimum frequency. Default: 0.
f_min (float, optional): Minimum frequency. Default: 0.0.
f_max (float, optional): Maximum frequency. Default: None, will be set to `sample_rate // 2` .
n_stft (int, optional): Number of bins in STFT. Default: 201.
norm (NormType, optional): Type of norm, value should be NormType.SLANEY or NormType::NONE.
@ -1414,7 +1414,7 @@ class MelScale(AudioTensorOperation):
"""
@check_mel_scale
def __init__(self, n_mels=128, sample_rate=16000, f_min=0, f_max=None, n_stft=201, norm=NormType.NONE,
def __init__(self, n_mels=128, sample_rate=16000, f_min=0.0, f_max=None, n_stft=201, norm=NormType.NONE,
mel_type=MelType.HTK):
super().__init__()
self.n_mels = n_mels

View File

@ -322,7 +322,7 @@ class FileWriter:
Examples:
>>> from mindspore.mindrecord import FileWriter
>>> writer = FileWriter(file_name="test.mindrecord", shard_num=1)
>>> status = writer.set_page_size(1 << 26) # 128MB
>>> status = writer.set_page_size(1 << 26) # 64MB
"""
return self._writer.set_page_size(page_size)