fix: mindrecord page size comment
This commit is contained in:
parent
28fdfb512a
commit
4be1c74181
|
@ -3,7 +3,7 @@ mindspore.dataset.Dataset.batch
|
|||
|
||||
.. py:method:: mindspore.dataset.Dataset.batch(batch_size, drop_remainder=False, num_parallel_workers=None, **kwargs)
|
||||
|
||||
将数据集中连续 `batch_size` 条数据合并为一个批处理数据,其中batch成一个Tensor前可选择使用 `per_batch_map` 对样本进行处理。
|
||||
将数据集中连续 `batch_size` 条数据组合为一个批数据,并可通过可选参数 `per_batch_map` 指定组合前要进行的预处理操作。
|
||||
|
||||
`batch` 操作要求每列中的数据具有相同的shape。
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ mindspore.dataset.Dataset.padded_batch
|
|||
|
||||
.. py:method:: mindspore.dataset.Dataset.padded_batch(batch_size, drop_remainder=False, num_parallel_workers=None, pad_info=None)
|
||||
|
||||
将数据集中连续 `batch_size` 条数据合并为一个批处理数据,其中batch成一个Tensor前可选择使用 `pad_info` 预先将样本补齐。
|
||||
将数据集中连续 `batch_size` 条数据组合为一个批数据,并可通过可选参数 `pad_info` 预先将样本补齐。
|
||||
|
||||
`batch` 操作要求每列中的数据具有相同的shape。
|
||||
|
||||
|
|
|
@ -235,6 +235,9 @@ Status DataQueueOp::SendDataToAscend() {
|
|||
TensorRow curr_row;
|
||||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&curr_row));
|
||||
first_fetch_flag_ = true;
|
||||
|
||||
MS_LOG(INFO) << "Begin to send data to device, channel name: " << channel_name_;
|
||||
|
||||
while (!curr_row.eof() && !is_break_loop) {
|
||||
while (!curr_row.eoe() && !is_break_loop) {
|
||||
RETURN_IF_NOT_OK(FilterMetadata(&curr_row));
|
||||
|
@ -263,6 +266,7 @@ Status DataQueueOp::SendDataToAscend() {
|
|||
batch_record_start = ProfilingTime::GetCurMilliSecond();
|
||||
#endif
|
||||
send_batch++;
|
||||
MS_LOG(INFO) << "Have sent " << send_batch << " batch(es) to device, channel name: " << channel_name_;
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
RETURN_IF_NOT_OK(md_channel_info_->RecordBatchQueue(ChildOpConnectorSize()));
|
||||
RETURN_IF_NOT_OK(md_channel_info_->RecordPreprocessBatch(send_batch));
|
||||
|
@ -543,6 +547,7 @@ Status DataQueueOp::PushDataToGPU() {
|
|||
gpu_connector_->capacity(), gpu_connector_->size());
|
||||
#endif
|
||||
send_batch++;
|
||||
MS_LOG(INFO) << "Have sent " << send_batch << " batch(es) to device, channel name: " << channel_name_;
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
md_channel_info_->RecordBatchQueue(gpu_connector_->size());
|
||||
md_channel_info_->RecordPreprocessBatch(send_batch);
|
||||
|
@ -638,6 +643,9 @@ Status DataQueueOp::SendDataToGPU() {
|
|||
first_fetch_flag_ = true;
|
||||
int64_t num_buf = 0;
|
||||
bool is_break_loop = false;
|
||||
|
||||
MS_LOG(INFO) << "Begin to send data to device, channel name: " << channel_name_;
|
||||
|
||||
while (!current_row.eof() && !is_break_loop && !device::DataQueueMgr::GetInstance().IsClosed()) {
|
||||
while (!current_row.eoe() && !is_break_loop && !device::DataQueueMgr::GetInstance().IsClosed()) {
|
||||
RETURN_IF_NOT_OK(FilterMetadata(¤t_row));
|
||||
|
|
|
@ -246,8 +246,8 @@ Status ShardWriter::SetHeaderSize(const uint64_t &header_size) {
|
|||
// header_size [16KB, 128MB]
|
||||
CHECK_FAIL_RETURN_UNEXPECTED_MR(header_size >= kMinHeaderSize && header_size <= kMaxHeaderSize,
|
||||
"Invalid data, header size: " + std::to_string(header_size) +
|
||||
" should be in range [" + std::to_string(kMinHeaderSize) + "MB, " +
|
||||
std::to_string(kMaxHeaderSize) + "MB].");
|
||||
" should be in range [" + std::to_string(kMinHeaderSize) + " bytes, " +
|
||||
std::to_string(kMaxHeaderSize) + " bytes].");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED_MR(
|
||||
header_size % 4 == 0, "Invalid data, header size " + std::to_string(header_size) + " should be divided by four.");
|
||||
header_size_ = header_size;
|
||||
|
@ -258,7 +258,8 @@ Status ShardWriter::SetPageSize(const uint64_t &page_size) {
|
|||
// PageSize [32KB, 256MB]
|
||||
CHECK_FAIL_RETURN_UNEXPECTED_MR(page_size >= kMinPageSize && page_size <= kMaxPageSize,
|
||||
"Invalid data, page size: " + std::to_string(page_size) + " should be in range [" +
|
||||
std::to_string(kMinPageSize) + "MB, " + std::to_string(kMaxPageSize) + "MB].");
|
||||
std::to_string(kMinPageSize) + " bytes, " + std::to_string(kMaxPageSize) +
|
||||
" bytes].");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED_MR(
|
||||
page_size % 4 == 0, "Invalid data, page size " + std::to_string(page_size) + " should be divided by four.");
|
||||
page_size_ = page_size;
|
||||
|
@ -1135,7 +1136,9 @@ Status ShardWriter::SetRawDataSize(const std::vector<std::vector<uint8_t>> &bin_
|
|||
CHECK_FAIL_RETURN_SYNTAX_ERROR_MR(*std::max_element(raw_data_size_.begin(), raw_data_size_.end()) <= page_size_,
|
||||
"Invalid data, Page size: " + std::to_string(page_size_) +
|
||||
" is too small to save a raw row. Please try to use the mindrecord api "
|
||||
"'set_page_size(1<<25)' to enable 64MB page size.");
|
||||
"'set_page_size(value)' to enable larger page size, and the value range is in [" +
|
||||
std::to_string(kMinPageSize) + " bytes, " + std::to_string(kMaxPageSize) +
|
||||
" bytes].");
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -1146,7 +1149,9 @@ Status ShardWriter::SetBlobDataSize(const std::vector<std::vector<uint8_t>> &blo
|
|||
CHECK_FAIL_RETURN_SYNTAX_ERROR_MR(*std::max_element(blob_data_size_.begin(), blob_data_size_.end()) <= page_size_,
|
||||
"Invalid data, Page size: " + std::to_string(page_size_) +
|
||||
" is too small to save a blob row. Please try to use the mindrecord api "
|
||||
"'set_page_size(1<<25)' to enable 64MB page size.");
|
||||
"'set_page_size(value)' to enable larger page size, and the value range is in [" +
|
||||
std::to_string(kMinPageSize) + " bytes, " + std::to_string(kMaxPageSize) +
|
||||
" bytes].");
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -1395,7 +1395,7 @@ class MelScale(AudioTensorOperation):
|
|||
Args:
|
||||
n_mels (int, optional): Number of mel filterbanks. Default: 128.
|
||||
sample_rate (int, optional): Sample rate of audio signal. Default: 16000.
|
||||
f_min (float, optional): Minimum frequency. Default: 0.
|
||||
f_min (float, optional): Minimum frequency. Default: 0.0.
|
||||
f_max (float, optional): Maximum frequency. Default: None, will be set to `sample_rate // 2` .
|
||||
n_stft (int, optional): Number of bins in STFT. Default: 201.
|
||||
norm (NormType, optional): Type of norm, value should be NormType.SLANEY or NormType::NONE.
|
||||
|
@ -1414,7 +1414,7 @@ class MelScale(AudioTensorOperation):
|
|||
"""
|
||||
|
||||
@check_mel_scale
|
||||
def __init__(self, n_mels=128, sample_rate=16000, f_min=0, f_max=None, n_stft=201, norm=NormType.NONE,
|
||||
def __init__(self, n_mels=128, sample_rate=16000, f_min=0.0, f_max=None, n_stft=201, norm=NormType.NONE,
|
||||
mel_type=MelType.HTK):
|
||||
super().__init__()
|
||||
self.n_mels = n_mels
|
||||
|
|
|
@ -322,7 +322,7 @@ class FileWriter:
|
|||
Examples:
|
||||
>>> from mindspore.mindrecord import FileWriter
|
||||
>>> writer = FileWriter(file_name="test.mindrecord", shard_num=1)
|
||||
>>> status = writer.set_page_size(1 << 26) # 128MB
|
||||
>>> status = writer.set_page_size(1 << 26) # 64MB
|
||||
"""
|
||||
return self._writer.set_page_size(page_size)
|
||||
|
||||
|
|
Loading…
Reference in New Issue