[feat][assistant][I3T96F] add new data operator SpeechCommands

This commit is contained in:
vddong 2021-11-27 11:25:18 +08:00
parent 3269c9b881
commit 91d3a7543f
23 changed files with 1346 additions and 5 deletions

View File

@ -111,6 +111,7 @@
#include "minddata/dataset/engine/ir/datasetops/source/qmnist_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/random_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/sbu_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/speech_commands_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/tf_record_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/usps_node.h"
@ -1526,6 +1527,29 @@ SBUDataset::SBUDataset(const std::vector<char> &dataset_dir, bool decode, const
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
}
SpeechCommandsDataset::SpeechCommandsDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
const std::shared_ptr<Sampler> &sampler,
const std::shared_ptr<DatasetCache> &cache) {
auto sampler_obj = sampler ? sampler->Parse() : nullptr;
auto ds = std::make_shared<SpeechCommandsNode>(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache);
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
}
SpeechCommandsDataset::SpeechCommandsDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
const Sampler *sampler, const std::shared_ptr<DatasetCache> &cache) {
auto sampler_obj = sampler ? sampler->Parse() : nullptr;
auto ds = std::make_shared<SpeechCommandsNode>(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache);
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
}
SpeechCommandsDataset::SpeechCommandsDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
const std::reference_wrapper<Sampler> sampler,
const std::shared_ptr<DatasetCache> &cache) {
auto sampler_obj = sampler.get().Parse();
auto ds = std::make_shared<SpeechCommandsNode>(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache);
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
}
TFRecordDataset::TFRecordDataset(const std::vector<std::vector<char>> &dataset_files, const std::vector<char> &schema,
const std::vector<std::vector<char>> &columns_list, int64_t num_samples,
ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, bool shard_equal_rows,

View File

@ -43,6 +43,7 @@
#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/random_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/speech_commands_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/yes_no_node.h"
@ -388,6 +389,17 @@ PYBIND_REGISTER(SBUNode, 2, ([](const py::module *m) {
}));
}));
PYBIND_REGISTER(SpeechCommandsNode, 2, ([](const py::module *m) {
(void)py::class_<SpeechCommandsNode, DatasetNode, std::shared_ptr<SpeechCommandsNode>>(
*m, "SpeechCommandsNode", "to create a SpeechCommandsNode")
.def(py::init([](std::string dataset_dir, std::string usage, py::handle sampler) {
auto speech_commands =
std::make_shared<SpeechCommandsNode>(dataset_dir, usage, toSamplerObj(sampler), nullptr);
THROW_IF_ERROR(speech_commands->ValidateParams());
return speech_commands;
}));
}));
PYBIND_REGISTER(TextFileNode, 2, ([](const py::module *m) {
(void)py::class_<TextFileNode, DatasetNode, std::shared_ptr<TextFileNode>>(*m, "TextFileNode",
"to create a TextFileNode")

View File

@ -862,7 +862,6 @@ Status ReadWaveFile(const std::string &wav_file_dir, std::vector<float> *wavefor
}
const float kMaxVal = 32767.0;
const int kDataMove = 2;
Path file_path(wav_realpath.value());
CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(),
"Invalid file, failed to find metadata file:" + file_path.ToString());
@ -872,8 +871,6 @@ Status ReadWaveFile(const std::string &wav_file_dir, std::vector<float> *wavefor
WavHeader *header = new WavHeader();
in.read(reinterpret_cast<char *>(header), sizeof(WavHeader));
*sample_rate = header->sampleRate;
std::unique_ptr<char[]> data = std::make_unique<char[]>(header->subChunk2Size);
in.read(data.get(), header->subChunk2Size);
float bytesPerSample = header->bitsPerSample / 8;
if (bytesPerSample == 0) {
in.close();
@ -881,9 +878,11 @@ Status ReadWaveFile(const std::string &wav_file_dir, std::vector<float> *wavefor
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "ReadWaveFile: divide zero error.");
}
int numSamples = header->subChunk2Size / bytesPerSample;
std::unique_ptr<int16_t[]> data = std::make_unique<int16_t[]>(numSamples);
in.read(reinterpret_cast<char *>(data.get()), sizeof(int16_t) * numSamples);
waveform_vec->resize(numSamples);
for (int i = 0; i < numSamples; i++) {
(*waveform_vec)[i] = static_cast<int16_t>(data[kDataMove * i] / kMaxVal);
(*waveform_vec)[i] = data[i] / kMaxVal;
}
in.close();
delete header;

View File

@ -28,6 +28,7 @@ set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
qmnist_op.cc
random_data_op.cc
sbu_op.cc
speech_commands_op.cc
text_file_op.cc
usps_op.cc
yes_no_op.cc

View File

@ -0,0 +1,205 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/engine/datasetops/source/speech_commands_op.h"
#include <fstream>
#include <iomanip>
#include <regex>
#include "minddata/dataset/audio/kernels/audio_utils.h"
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/core/tensor_shape.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
#include "minddata/dataset/engine/execution_tree.h"
#include "utils/file_utils.h"
#include "utils/ms_utils.h"
namespace mindspore {
namespace dataset {
constexpr char kTestFiles[] = "testing_list.txt";
constexpr char kValFiles[] = "validation_list.txt";
constexpr char kExtension[] = ".wav";
#ifndef _WIN32
constexpr char kSplitSymbol[] = "/";
#else
constexpr char kSplitSymbol[] = "\\";
#endif
SpeechCommandsOp::SpeechCommandsOp(const std::string &dataset_dir, const std::string &usage, int32_t num_workers,
int32_t queue_size, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
dataset_dir_(dataset_dir),
usage_(usage),
data_schema_(std::move(data_schema)) {}
Status SpeechCommandsOp::PrepareData() {
// Get file lists according to usage.
// When usage == "train", need to get all filenames then subtract files of usage: "test" and "valid".
std::set<std::string> selected_files;
auto real_dataset_dir = FileUtils::GetRealPath(dataset_dir_.data());
if (!real_dataset_dir.has_value()) {
MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_;
RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_);
}
std::string real_path = real_dataset_dir.value();
if (usage_ == "all") {
RETURN_IF_NOT_OK(WalkAllFiles(real_path));
selected_files = all_wave_files;
} else if (usage_ == "test" || usage_ == "valid") {
RETURN_IF_NOT_OK(ParseFileList(real_path, usage_));
selected_files = loaded_names;
} else {
RETURN_IF_NOT_OK(WalkAllFiles(real_path));
RETURN_IF_NOT_OK(ParseFileList(real_path, "test"));
RETURN_IF_NOT_OK(ParseFileList(real_path, "valid"));
set_difference(all_wave_files.begin(), all_wave_files.end(), loaded_names.begin(), loaded_names.end(),
inserter(selected_files, selected_files.begin()));
}
selected_files_vec.assign(selected_files.begin(), selected_files.end());
num_rows_ = selected_files_vec.size();
return Status::OK();
}
Status SpeechCommandsOp::ParseFileList(const std::string &pf_path, const std::string &pf_usage) {
std::string line;
std::string file_list = (pf_usage == "test" ? kTestFiles : kValFiles);
Path path(pf_path);
std::string list_path = (Path(pf_path) / Path(file_list)).ToString();
std::ifstream file_reader(list_path);
while (getline(file_reader, line)) {
Path file_path(path / line);
loaded_names.insert(file_path.ToString());
}
file_reader.close();
return Status::OK();
}
Status SpeechCommandsOp::WalkAllFiles(const std::string &walk_path) {
Path dir(walk_path);
if (dir.IsDirectory() == false) {
RETURN_STATUS_UNEXPECTED("Invalid parameter, no folder found in: " + walk_path);
}
std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(&dir);
RETURN_UNEXPECTED_IF_NULL(dir_itr);
std::vector<std::string> folder_names;
while (dir_itr->HasNext()) {
Path sub_dir = dir_itr->Next();
if (sub_dir.IsDirectory() && (sub_dir.ToString().find("_background_noise_") == std::string::npos)) {
folder_names.emplace_back(sub_dir.ToString());
}
}
CHECK_FAIL_RETURN_UNEXPECTED(!folder_names.empty(), "Invalid file, failed to open directory: " + dir.ToString());
for (int i = 0; i < folder_names.size(); i++) {
Path folder_path(folder_names[i]);
if (folder_path.IsDirectory()) {
auto folder_it = Path::DirIterator::OpenDirectory(&folder_path);
while (folder_it->HasNext()) {
Path file = folder_it->Next();
if (file.Extension() == kExtension) {
all_wave_files.insert(file.ToString());
}
}
} else {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open directory: " + folder_path.ToString());
}
}
CHECK_FAIL_RETURN_UNEXPECTED(!all_wave_files.empty(), "Invalid file, no .wav files found under " + dataset_dir_);
return Status::OK();
}
Status SpeechCommandsOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
RETURN_UNEXPECTED_IF_NULL(trow);
std::string file_name = selected_files_vec[row_id];
std::shared_ptr<Tensor> waveform, sample_rate_scalar, label_scalar, speaker_id_scalar, utterance_number_scalar;
std::string label, speaker_id;
int32_t utterance_number, sample_rate;
std::vector<float> waveform_vec;
RETURN_IF_NOT_OK(ReadWaveFile(file_name, &waveform_vec, &sample_rate));
RETURN_IF_NOT_OK(Tensor::CreateFromVector(waveform_vec, &waveform));
RETURN_IF_NOT_OK(waveform->ExpandDim(0));
RETURN_IF_NOT_OK(GetFileInfo(file_name, &label, &speaker_id, &utterance_number));
RETURN_IF_NOT_OK(Tensor::CreateScalar(sample_rate, &sample_rate_scalar));
RETURN_IF_NOT_OK(Tensor::CreateScalar(label, &label_scalar));
RETURN_IF_NOT_OK(Tensor::CreateScalar(speaker_id, &speaker_id_scalar));
RETURN_IF_NOT_OK(Tensor::CreateScalar(utterance_number, &utterance_number_scalar));
(*trow) = TensorRow(row_id, {waveform, sample_rate_scalar, label_scalar, speaker_id_scalar, utterance_number_scalar});
trow->setPath({file_name, file_name, file_name, file_name, file_name});
return Status::OK();
}
void SpeechCommandsOp::Print(std::ostream &out, bool show_all) const {
if (!show_all) {
// Call the super class for displaying and common 1-liner info
ParallelOp::Print(out, show_all);
// Then show and custom derived-internal 1-liner info for this op
out << "\n";
} else {
// Call the super class for displaying any common detailed info
ParallelOp::Print(out, show_all);
// Then show any custom derived-internal stuff
out << "\nNumber of rows: " << num_rows_ << "\nSpeechCommands directory: " << dataset_dir_ << "\n\n";
}
}
Status SpeechCommandsOp::ComputeColMap() {
if (column_name_id_map_.empty()) {
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->Column(i).Name()] = i;
}
} else {
MS_LOG(WARNING) << "Column name map is already set!";
}
return Status::OK();
}
Status SpeechCommandsOp::GetFileInfo(const std::string &file_path, std::string *label, std::string *speaker_id,
int32_t *utterance_number) {
// Using regex to get wave infos from filename.
RETURN_UNEXPECTED_IF_NULL(label);
RETURN_UNEXPECTED_IF_NULL(speaker_id);
RETURN_UNEXPECTED_IF_NULL(utterance_number);
int32_t split_index = 0;
split_index = file_path.find_last_of(kSplitSymbol);
std::string label_string = file_path.substr(0, split_index);
*label = label_string.substr(label_string.find_last_of(kSplitSymbol) + 1); // plus "1" for index start from 0.
std::string filename = file_path.substr(split_index + 1);
std::regex pattern = std::regex("(.*)_nohash_(\\d+)\\.wav");
std::smatch result;
regex_match(filename, result, pattern);
CHECK_FAIL_RETURN_UNEXPECTED(!(result[0] == "" || result[1] == ""),
"Invalid file name, failed to get file info: " + filename);
*speaker_id = result[1];
std::string utt_id = result[2];
*utterance_number = atoi(utt_id.c_str());
return Status::OK();
}
Status SpeechCommandsOp::CountTotalRows(int64_t *num_rows) {
RETURN_UNEXPECTED_IF_NULL(num_rows);
if (all_wave_files.size() == 0) {
auto real_path = FileUtils::GetRealPath(dataset_dir_.data());
if (!real_path.has_value()) {
MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_;
RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_);
}
RETURN_IF_NOT_OK(WalkAllFiles(real_path.value()));
}
(*num_rows) = static_cast<int64_t>(all_wave_files.size());
return Status::OK();
}
} // namespace dataset
} // namespace mindspore

View File

@ -0,0 +1,111 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SPEECH_COMMANDS_OP_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SPEECH_COMMANDS_OP_H_
#include <map>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/engine/data_schema.h"
#include "minddata/dataset/engine/datasetops/parallel_op.h"
#include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
#include "minddata/dataset/util/services.h"
#include "minddata/dataset/util/status.h"
#include "minddata/dataset/util/wait_post.h"
namespace mindspore {
namespace dataset {
class SpeechCommandsOp : public MappableLeafOp {
public:
/// Constructor.
/// \param[in] std::string - dataset_dir - directory of SpeechCommands dataset.
/// \param[in] std::string - usage - directory of SpeechCommands dataset.
/// \param[in] uint32_t - num_workers - Num of workers reading audios in parallel.
/// \param[in] uint32_t - queue_size - connector queue size.
/// \param[in] std::unique_ptr<DataSchema> - data_schema - data schema of SpeechCommands dataset.
/// \param[in] std::unique_ptr<Sampler> - sampler - sampler tells SpeechCommands what to read.
SpeechCommandsOp(const std::string &dataset_dir, const std::string &usage, int32_t num_workers, int32_t queue_size,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
/// Destructor.
~SpeechCommandsOp() override = default;
/// A print method typically used for debugging.
/// \param[out] out - out stream.
/// \param[in] show_all - whether to show all information.
void Print(std::ostream &out, bool show_all) const override;
/// Function to count the number of samples in the SpeechCommands dataset.
/// \param[in] num_rows output arg that will hold the actual dataset size.
/// \return Status - The status code returned.
Status CountTotalRows(int64_t *num_rows);
/// Op name getter.
/// \return Name of the current Op.
std::string Name() const override { return "SpeechCommandsOp"; }
private:
/// Load a tensor row.
/// \param[in] row_id - row id.
/// \param[in] trow - waveform & sample_rate & label & speaker_id & utterance_number
/// read into this tensor row.
/// \return Status - The status code returned.
Status LoadTensorRow(row_id_type row_id, TensorRow *trow) override;
/// \param[in] pf_path - the real path of root directory.
/// \param[in] pf_usage - usage.
/// \return Status - The status code returned.
Status ParseFileList(const std::string &pf_path, const std::string &pf_usage);
/// Called first when function is called.
/// \return Status - The status code returned.
Status PrepareData();
/// Walk all folders to read all ".wav" files.
/// \param[in] walk_path - real path to traverse.
/// \return Status - The status code returned.
Status WalkAllFiles(const std::string &walk_path);
/// Get detail info of wave filename by regex.
/// \param[in] file_path - wave file path.
/// \param[out] label - label.
/// \param[out] speaker_id - speaker id.
/// \param[out] utterance_number - utterance number.
/// \return Status - The status code returned.
Status GetFileInfo(const std::string &file_path, std::string *label, std::string *speaker_id,
int32_t *utterance_number);
// Private function for computing the assignment of the column name map.
/// \return Status - The status code returned.
Status ComputeColMap() override;
std::string dataset_dir_;
std::string usage_; // can only be "test", "train", "valid" or "all".
std::unique_ptr<DataSchema> data_schema_;
std::set<std::string> all_wave_files; // all wave files in dataset_dir.
std::set<std::string> loaded_names; // loaded file names from txt files.
std::vector<std::string> selected_files_vec; // vector of filenames for sequential loading.
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SPEECH_COMMANDS_OP_H_

View File

@ -100,6 +100,7 @@ constexpr char kPlaces365Node[] = "Places365Dataset";
constexpr char kQMnistNode[] = "QMnistDataset";
constexpr char kRandomNode[] = "RandomDataset";
constexpr char kSBUNode[] = "SBUDataset";
constexpr char kSpeechCommandsNode[] = "SpeechCommandsDataset";
constexpr char kTextFileNode[] = "TextFileDataset";
constexpr char kTFRecordNode[] = "TFRecordDataset";
constexpr char kUSPSNode[] = "USPSDataset";

View File

@ -28,6 +28,7 @@ set(DATASET_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES
qmnist_node.cc
random_node.cc
sbu_node.cc
speech_commands_node.cc
text_file_node.cc
tf_record_node.cc
usps_node.cc

View File

@ -0,0 +1,117 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/engine/ir/datasetops/source/speech_commands_node.h"
#include <utility>
#include "minddata/dataset/engine/datasetops/source/speech_commands_op.h"
namespace mindspore {
namespace dataset {
SpeechCommandsNode::SpeechCommandsNode(const std::string &dataset_dir, const std::string &usage,
std::shared_ptr<SamplerObj> sampler, std::shared_ptr<DatasetCache> cache)
: MappableSourceNode(std::move(cache)), dataset_dir_(dataset_dir), usage_(usage), sampler_(sampler) {}
std::shared_ptr<DatasetNode> SpeechCommandsNode::Copy() {
std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
auto node = std::make_shared<SpeechCommandsNode>(dataset_dir_, usage_, sampler, cache_);
return node;
}
void SpeechCommandsNode::Print(std::ostream &out) const {
out << (Name() + "(cache: " + ((cache_ != nullptr) ? "true" : "false") + ")");
}
Status SpeechCommandsNode::ValidateParams() {
RETURN_IF_NOT_OK(DatasetNode::ValidateParams());
RETURN_IF_NOT_OK(ValidateDatasetDirParam("SpeechCommandsNode", dataset_dir_));
RETURN_IF_NOT_OK(ValidateDatasetSampler("SpeechCommandsNode", sampler_));
RETURN_IF_NOT_OK(ValidateStringValue("SpeechCommandsNode", usage_, {"train", "valid", "test", "all"}));
return Status::OK();
}
Status SpeechCommandsNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
auto schema = std::make_unique<DataSchema>();
RETURN_IF_NOT_OK(
schema->AddColumn(ColDescriptor("waveform", DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1)));
TensorShape sample_rate_scalar = TensorShape::CreateScalar();
TensorShape label_scalar = TensorShape::CreateScalar();
TensorShape speaker_id_scalar = TensorShape::CreateScalar();
TensorShape utterance_number_scalar = TensorShape::CreateScalar();
RETURN_IF_NOT_OK(schema->AddColumn(
ColDescriptor("sample_rate", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &sample_rate_scalar)));
RETURN_IF_NOT_OK(
schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, &label_scalar)));
RETURN_IF_NOT_OK(schema->AddColumn(
ColDescriptor("speaker_id", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, &speaker_id_scalar)));
RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("utterance_number", DataType(DataType::DE_INT32),
TensorImpl::kFlexible, 0, &utterance_number_scalar)));
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
auto speech_commands_op = std::make_shared<SpeechCommandsOp>(dataset_dir_, usage_, num_workers_, connector_que_size_,
std::move(schema), std::move(sampler_rt));
speech_commands_op->SetTotalRepeats(GetTotalRepeats());
speech_commands_op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
node_ops->push_back(speech_commands_op);
return Status::OK();
}
Status SpeechCommandsNode::GetShardId(int32_t *shard_id) {
*shard_id = sampler_->ShardId();
return Status::OK();
}
Status SpeechCommandsNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
int64_t *dataset_size) {
if (dataset_size_ > 0) {
*dataset_size = dataset_size_;
return Status::OK();
}
int64_t sample_size, num_rows;
std::vector<std::shared_ptr<DatasetOp>> ops;
RETURN_IF_NOT_OK(Build(&ops));
CHECK_FAIL_RETURN_UNEXPECTED(!ops.empty(), "Unable to build SpeechCommandsOp.");
auto op = std::dynamic_pointer_cast<SpeechCommandsOp>(ops.front());
RETURN_IF_NOT_OK(op->CountTotalRows(&num_rows));
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
sample_size = sampler_rt->CalculateNumSamples(num_rows);
if (sample_size == -1) {
RETURN_IF_NOT_OK(size_getter->DryRun(shared_from_this(), &sample_size));
}
*dataset_size = sample_size;
dataset_size_ = *dataset_size;
return Status::OK();
}
Status SpeechCommandsNode::to_json(nlohmann::json *out_json) {
nlohmann::json args, sampler_args;
RETURN_IF_NOT_OK(sampler_->to_json(&sampler_args));
args["sampler"] = sampler_args;
args["usage"] = usage_;
args["num_parallel_workers"] = num_workers_;
args["dataset_dir"] = dataset_dir_;
if (cache_ != nullptr) {
nlohmann::json cache_args;
RETURN_IF_NOT_OK(cache_->to_json(&cache_args));
args["cache"] = cache_args;
}
*out_json = args;
return Status::OK();
}
} // namespace dataset
} // namespace mindspore

View File

@ -0,0 +1,97 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_SPEECH_COMMANDS_NODE_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_SPEECH_COMMANDS_NODE_H_
#include <memory>
#include <string>
#include <vector>
#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
namespace mindspore {
namespace dataset {
class SpeechCommandsNode : public MappableSourceNode {
public:
/// \brief Constructor.
SpeechCommandsNode(const std::string &dataset_dir, const std::string &usage, std::shared_ptr<SamplerObj> sampler,
std::shared_ptr<DatasetCache> cache);
/// \brief Destructor.
~SpeechCommandsNode() = default;
/// \brief Node name getter.
/// \return Name of the current node.
std::string Name() const override { return kSpeechCommandsNode; }
/// \brief Print the description.
/// \param[out] out - The output stream to write output to.
void Print(std::ostream &out) const override;
/// \brief Copy the node to a new object.
/// \return A shared pointer to the new copy.
std::shared_ptr<DatasetNode> Copy() override;
/// \brief a base class override function to create the required runtime dataset op objects for this class.
/// \param node_ops - A vector containing shared pointer to the Dataset Ops that this object will create.
/// \return Status Status::OK() if build successfully.
Status Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) override;
/// \brief Parameters validation.
/// \return Status Status::OK() if all the parameters are valid.
Status ValidateParams() override;
/// \brief Get the shard id of node.
/// \param[in] shard_id The shard id.
/// \return Status Status::OK() if get shard id successfully.
Status GetShardId(int32_t *shard_id) override;
/// \brief Base-class override for GetDatasetSize.
/// \param[in] size_getter Shared pointer to DatasetSizeGetter.
/// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting
/// dataset size at the expense of accuracy.
/// \param[out] dataset_size the size of the dataset.
/// \return Status of the function.
Status GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
int64_t *dataset_size) override;
/// \brief Getter functions.
const std::string &DatasetDir() const { return dataset_dir_; }
/// \brief Getter functions.
const std::string &Usage() const { return usage_; }
/// \brief Get the arguments of node.
/// \param[out] out_json JSON string of all attributes.
/// \return Status of the function.
Status to_json(nlohmann::json *out_json) override;
/// \brief Sampler getter.
/// \return SamplerObj of the current node.
std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
/// \brief Sampler setter.
/// \param[in] sampler - Specify sampler.
void SetSampler(std::shared_ptr<SamplerObj> sampler) override { sampler_ = sampler; }
private:
std::string dataset_dir_;
std::string usage_;
std::shared_ptr<SamplerObj> sampler_;
}; // class SpeechCommandsNode
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_SPEECH_COMMANDS_NODE_H_

View File

@ -3533,6 +3533,79 @@ inline std::shared_ptr<SBUDataset> SBU(const std::string &dataset_dir, bool deco
return std::make_shared<SBUDataset>(StringToChar(dataset_dir), decode, sampler, cache);
}
/// \class SpeechCommandsDataset.
/// \brief A source dataset that reads and parses SpeechCommands dataset.
class SpeechCommandsDataset : public Dataset {
public:
/// \brief Constructor of SpeechCommandsDataset.
/// \param[in] dataset_dir Path to the root directory that contains the dataset.
/// \param[in] usage Usage of SpeechCommands, can be "train", "test", "valid" or "all".
/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset.
/// \param[in] cache Tensor cache to use.
SpeechCommandsDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
const std::shared_ptr<Sampler> &sampler, const std::shared_ptr<DatasetCache> &cache);
/// \brief Constructor of SpeechCommandsDataset.
/// \param[in] dataset_dir Path to the root directory that contains the dataset.
/// \param[in] usage Usage of SpeechCommands, can be "train", "test", "valid" or "all".
/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
/// \param[in] cache Tensor cache to use.
SpeechCommandsDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage, const Sampler *sampler,
const std::shared_ptr<DatasetCache> &cache);
/// \brief Constructor of SpeechCommandsDataset.
/// \param[in] dataset_dir Path to the root directory that contains the dataset.
/// \param[in] usage Usage of SpeechCommands, can be "train", "test", "valid" or "all".
/// \param[in] sampler Sampler object used to choose samples from the dataset.
/// \param[in] cache Tensor cache to use.
SpeechCommandsDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
const std::reference_wrapper<Sampler> sampler, const std::shared_ptr<DatasetCache> &cache);
/// Destructor of SpeechCommandsDataset.
~SpeechCommandsDataset() = default;
};
/// \brief Function to create a SpeechCommands Dataset.
/// \note The generated dataset has five columns ["waveform", "sample_rate", "label", "speaker_id", "utterance_number"].
/// \param[in] dataset_dir Path to the root directory that contains the dataset.
/// \param[in] usage Usage of SpeechCommands, can be "train", "test", "valid" or "all" (default = "all").
/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not
/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
/// \param[in] cache Tensor cache to use (default=nullptr, which means no cache is used).
/// \return Shared pointer to the SpeechCommandsDataset.
inline std::shared_ptr<SpeechCommandsDataset> SpeechCommands(
const std::string &dataset_dir, const std::string &usage = "all",
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
const std::shared_ptr<DatasetCache> &cache = nullptr) {
return std::make_shared<SpeechCommandsDataset>(StringToChar(dataset_dir), StringToChar(usage), sampler, cache);
}
/// \brief Function to create a SpeechCommands Dataset.
/// \note The generated dataset has five columns ["waveform", "sample_rate", "label", "speaker_id", "utterance_number"].
/// \param[in] dataset_dir Path to the root directory that contains the dataset.
/// \param[in] usage Usage of SpeechCommands, can be "train", "test", "valid" or "all".
/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
/// \param[in] cache Tensor cache to use (default=nullptr, which means no cache is used).
/// \return Shared pointer to the SpeechCommandsDataset.
inline std::shared_ptr<SpeechCommandsDataset> SpeechCommands(const std::string &dataset_dir, const std::string &usage,
const Sampler *sampler,
const std::shared_ptr<DatasetCache> &cache = nullptr) {
return std::make_shared<SpeechCommandsDataset>(StringToChar(dataset_dir), StringToChar(usage), sampler, cache);
}
/// \brief Function to create a SpeechCommands Dataset.
/// \note The generated dataset has five columns ["waveform", "sample_rate", "label", "speaker_id", "utterance_number"].
/// \param[in] dataset_dir Path to the root directory that contains the dataset.
/// \param[in] usage Usage of SpeechCommands, can be "train", "test", "valid" or "all".
/// \param[in] sampler Sampler object used to choose samples from the dataset.
/// \param[in] cache Tensor cache to use (default=nullptr, which means no cache is used).
/// \return Shared pointer to the SpeechCommandsDataset.
inline std::shared_ptr<SpeechCommandsDataset> SpeechCommands(const std::string &dataset_dir, const std::string &usage,
const std::reference_wrapper<Sampler> sampler,
const std::shared_ptr<DatasetCache> &cache = nullptr) {
return std::make_shared<SpeechCommandsDataset>(StringToChar(dataset_dir), StringToChar(usage), sampler, cache);
}
/// \class TextFileDataset
/// \brief A source dataset that reads and parses datasets stored on disk in text format.
class TextFileDataset : public Dataset {

View File

@ -53,6 +53,7 @@ class Sampler : std::enable_shared_from_this<Sampler> {
friend class QMnistDataset;
friend class RandomDataDataset;
friend class SBUDataset;
friend class SpeechCommandsDataset;
friend class TextFileDataset;
friend class TFRecordDataset;
friend class USPSDataset;

View File

@ -69,7 +69,7 @@ from .validators import check_batch, check_shuffle, check_map, check_filter, che
check_sb_dataset, check_flowers102dataset, check_cityscapes_dataset, check_usps_dataset, check_div2k_dataset, \
check_sbu_dataset, check_qmnist_dataset, check_emnist_dataset, check_fake_image_dataset, check_places365_dataset, \
check_photo_tour_dataset, check_ag_news_dataset, check_dbpedia_dataset, check_lj_speech_dataset, \
check_yes_no_dataset
check_yes_no_dataset, check_speech_commands_dataset
from ..core.config import get_callback_timeout, _init_device_info, get_enable_shared_mem, get_num_parallel_workers, \
get_prefetch_size, get_auto_offload
from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist
@ -7922,6 +7922,130 @@ class _SBDataset:
return self._get_item(idx)
class SpeechCommandsDataset(MappableDataset):
"""
A source dataset for reading and parsing the SpeechCommands dataset.
The generated dataset has five columns :py:obj:`[waveform, sample_rate, label, speaker_id, utterance_number]`.
The tensor of column :py:obj:`waveform` is a vector of the float32 type.
The tensor of column :py:obj:`sample_rate` is a scalar of the int32 type.
The tensor of column :py:obj:`label` is a scalar of the string type.
The tensor of column :py:obj:`speaker_id` is a scalar of the string type.
The tensor of column :py:obj:`utterance_number` is a scalar of the int32 type.
Args:
dataset_dir (str): Path to the root directory that contains the dataset.
usage (str, optional): Usage of this dataset, can be `train`, `test`, `valid` or `all`. `train`
will read from 84,843 samples, `test` will read from 11,005 samples, `valid` will read from 9,981
test samples and `all` will read from all 105,829 samples (default=None, will read all samples).
num_samples (int, optional): The number of samples to be included in the dataset
(default=None, will read all samples).
num_parallel_workers (int, optional): Number of workers to read the data
(default=None, will use value set in the config).
shuffle (bool, optional): Whether or not to perform shuffle on the dataset
(default=None, expected order behavior shown in the table).
sampler (Sampler, optional): Object used to choose samples from the dataset
(default=None, expected order behavior shown in the table).
num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
shard_id (int, optional): The shard ID within `num_shards` (default=None). This argument can only be specified
when `num_shards` is also specified.
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
(default=None, which means no cache is used).
Raises:
RuntimeError: If dataset_dir does not contain data files.
RuntimeError: If num_parallel_workers exceeds the max thread numbers.
RuntimeError: If sampler and shuffle are specified at the same time.
RuntimeError: If sampler and sharding are specified at the same time.
RuntimeError: If num_shards is specified but shard_id is None.
RuntimeError: If shard_id is specified but num_shards is None.
ValueError: If shard_id is invalid (< 0 or >= num_shards).
Note:
- This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
The table below shows what input arguments are allowed and their expected behavior.
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
:widths: 25 25 50
:header-rows: 1
* - Parameter `sampler`
- Parameter `shuffle`
- Expected Order Behavior
* - None
- None
- random order
* - None
- True
- random order
* - None
- False
- sequential order
* - Sampler object
- None
- order defined by sampler
* - Sampler object
- True
- not allowed
* - Sampler object
- False
- not allowed
Examples:
>>> speech_commands_dataset_dir = "/path/to/speech_commands_dataset_directory"
>>>
>>> # Read 3 samples from SpeechCommands dataset
>>> dataset = ds.SpeechCommandsDataset(dataset_dir=speech_commands_dataset_dir, num_samples=3)
>>>
>>> # Note: In SpeechCommands dataset, each dictionary has keys "waveform", "sample_rate", "label",
>>> # "speaker_id" and "utterance_number".
About SpeechCommands dataset:
The SpeechCommands is database for limited_vocabulary speech recognition, containing 105,829 audio samples of
'.wav' format.
Here is the original SpeechCommands dataset structure.
You can unzip the dataset files into this directory structure and read by MindSpore's API.
.. code-block::
.
speech_commands_dataset_dir
cat
b433eff_nohash_0.wav
5a33edf_nohash_1.wav
....
dog
b433w2w_nohash_0.wav
....
four
....
Citation:
.. code-block::
@article{2018Speech,
title={Speech Commands: A Dataset for Limited-Vocabulary Speech Recognition},
author={Warden, P.},
year={2018}
}
"""
@check_speech_commands_dataset
def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
sampler=None, num_shards=None, shard_id=None, cache=None):
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
self.dataset_dir = dataset_dir
self.usage = replace_none(usage, "all")
def parse(self, children=None):
return cde.SpeechCommandsNode(self.dataset_dir, self.usage, self.sampler)
class DeserializedDataset(Dataset):
def __init__(self, input_obj):
super().__init__()

View File

@ -1637,6 +1637,36 @@ def check_sb_dataset(method):
return new_method
def check_speech_commands_dataset(method):
"""A wrapper that wraps a parameter checker around the original Dataset(SpeechCommandsDataset)."""
@wraps(method)
def new_method(self, *args, **kwargs):
_, param_dict = parse_user_args(method, *args, **kwargs)
nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
nreq_param_bool = ['shuffle']
dataset_dir = param_dict.get('dataset_dir')
check_dir(dataset_dir)
usage = param_dict.get('usage')
if usage is not None:
check_valid_str(usage, ["train", "test", "valid", "all"], "usage")
validate_dataset_param_value(nreq_param_int, param_dict, int)
validate_dataset_param_value(nreq_param_bool, param_dict, bool)
check_sampler_shuffle_shard_options(param_dict)
cache = param_dict.get('cache')
check_cache_option(cache)
return method(self, *args, **kwargs)
return new_method
def check_cityscapes_dataset(method):
"""A wrapper that wraps a parameter checker around the original CityScapesDataset."""

View File

@ -39,6 +39,7 @@ SET(DE_UT_SRCS
c_api_dataset_randomdata_test.cc
c_api_dataset_save.cc
c_api_dataset_sbu_test.cc
c_api_dataset_speech_commands_test.cc
c_api_dataset_textfile_test.cc
c_api_dataset_tfrecord_test.cc
c_api_dataset_usps_test.cc

View File

@ -0,0 +1,343 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common/common.h"
#include "minddata/dataset/include/dataset/datasets.h"
using namespace mindspore::dataset;
using mindspore::dataset::DataType;
using mindspore::dataset::Tensor;
using mindspore::dataset::TensorShape;
class MindDataTestPipeline : public UT::DatasetOpTesting {
protected:
};
/// Feature: Test SpeechCommands dataset.
/// Description: read data from a single file.
/// Expectation: the data is processed successfully.
TEST_F(MindDataTestPipeline, TestSpeechCommandsDataset) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSpeechCommandsDataset.";
std::string folder_path = datasets_root_path_ + "/testSpeechCommandsData/";
std::shared_ptr<Dataset> ds = SpeechCommands(folder_path, "all", std::make_shared<RandomSampler>(false, 2));
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset.
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row.
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
MS_LOG(INFO) << "iter->GetNextRow(&row) OK";
EXPECT_NE(row.find("waveform"), row.end());
EXPECT_NE(row.find("sample_rate"), row.end());
EXPECT_NE(row.find("label"), row.end());
EXPECT_NE(row.find("speaker_id"), row.end());
EXPECT_NE(row.find("utterance_number"), row.end());
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto waveform = row["waveform"];
MS_LOG(INFO) << "Tensor waveform shape: " << waveform.Shape();
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 2);
// Manually terminate the pipeline.
iter->Stop();
}
/// Feature: Test SpeechCommands dataset.
/// Description: test SpeechCommands dataset in pipeline.
/// Expectation: the data is processed successfully.
TEST_F(MindDataTestPipeline, TestSpeechCommandsDatasetWithPipeline) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSpeechCommandsDatasetWithPipeline.";
// Create two SpeechCommands Dataset.
std::string folder_path = datasets_root_path_ + "/testSpeechCommandsData/";
std::shared_ptr<Dataset> ds1 = SpeechCommands(folder_path, "all", std::make_shared<RandomSampler>(false, 1));
std::shared_ptr<Dataset> ds2 = SpeechCommands(folder_path, "all", std::make_shared<RandomSampler>(false, 2));
EXPECT_NE(ds1, nullptr);
EXPECT_NE(ds2, nullptr);
// Create two Repeat operation on ds.
int32_t repeat_num = 2;
ds1 = ds1->Repeat(repeat_num);
EXPECT_NE(ds1, nullptr);
repeat_num = 3;
ds2 = ds2->Repeat(repeat_num);
EXPECT_NE(ds2, nullptr);
// Create two Project operation on ds.
std::vector<std::string> column_project = {"waveform", "sample_rate", "label", "speaker_id", "utterance_number"};
ds1 = ds1->Project(column_project);
EXPECT_NE(ds1, nullptr);
ds2 = ds2->Project(column_project);
EXPECT_NE(ds2, nullptr);
// Create a Concat operation on the ds.
ds1 = ds1->Concat({ds2});
EXPECT_NE(ds1, nullptr);
// Create an iterator over the result of the above dataset.
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds1->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row.
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
EXPECT_NE(row.find("waveform"), row.end());
EXPECT_NE(row.find("sample_rate"), row.end());
EXPECT_NE(row.find("label"), row.end());
EXPECT_NE(row.find("speaker_id"), row.end());
EXPECT_NE(row.find("utterance_number"), row.end());
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto waveform = row["waveform"];
MS_LOG(INFO) << "Tensor waveform shape: " << waveform.Shape();
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 8);
// Manually terminate the pipeline.
iter->Stop();
}
/// Feature: Test SpeechCommands dataset.
/// Description: get the size of SpeechCommands dataset.
/// Expectation: the data is processed successfully.
TEST_F(MindDataTestPipeline, TestSpeechCommandsGetDatasetSize) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSpeechCommandsGetDatasetSize.";
// Create a SpeechCommands Dataset.
std::string folder_path = datasets_root_path_ + "/testSpeechCommandsData/";
std::shared_ptr<Dataset> ds = SpeechCommands(folder_path, "all");
EXPECT_NE(ds, nullptr);
EXPECT_EQ(ds->GetDatasetSize(), 3);
}
/// Feature: Test SpeechCommands dataset.
/// Description: getter functions.
/// Expectation: the data is processed successfully.
TEST_F(MindDataTestPipeline, TestSpeechCommandsGetters) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSpeechCommandsMixGetter.";
// Create a SpeechCommands Dataset.
std::string folder_path = datasets_root_path_ + "/testSpeechCommandsData/";
std::shared_ptr<Dataset> ds = SpeechCommands(folder_path);
EXPECT_NE(ds, nullptr);
EXPECT_EQ(ds->GetDatasetSize(), 3);
std::vector<DataType> types = ToDETypes(ds->GetOutputTypes());
std::vector<TensorShape> shapes = ToTensorShapeVec(ds->GetOutputShapes());
std::vector<std::string> column_names = {"waveform", "sample_rate", "label", "speaker_id", "utterance_number"};
EXPECT_EQ(types.size(), 5);
EXPECT_EQ(types[0].ToString(), "float32");
EXPECT_EQ(types[1].ToString(), "int32");
EXPECT_EQ(types[2].ToString(), "string");
EXPECT_EQ(types[3].ToString(), "string");
EXPECT_EQ(types[4].ToString(), "int32");
EXPECT_EQ(shapes.size(), 5);
EXPECT_EQ(shapes[1].ToString(), "<>");
EXPECT_EQ(shapes[2].ToString(), "<>");
EXPECT_EQ(shapes[3].ToString(), "<>");
EXPECT_EQ(shapes[4].ToString(), "<>");
EXPECT_EQ(ds->GetBatchSize(), 1);
EXPECT_EQ(ds->GetRepeatCount(), 1);
EXPECT_EQ(ds->GetDatasetSize(), 3);
EXPECT_EQ(ToDETypes(ds->GetOutputTypes()), types);
EXPECT_EQ(ToTensorShapeVec(ds->GetOutputShapes()), shapes);
EXPECT_EQ(ds->GetColumnNames(), column_names);
}
/// Feature: Test SpeechCommands dataset.
/// Description: test usage "train".
/// Expectation: the data is processed successfully.
TEST_F(MindDataTestPipeline, TestSpeechCommandsUsageTrain) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSpeechCommandsDataset.";
std::string folder_path = datasets_root_path_ + "/testSpeechCommandsData/";
std::shared_ptr<Dataset> ds = SpeechCommands(folder_path, "train", std::make_shared<RandomSampler>(false, 1));
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset.
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row.
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
MS_LOG(INFO) << "iter->GetNextRow(&row) OK";
EXPECT_NE(row.find("waveform"), row.end());
EXPECT_NE(row.find("sample_rate"), row.end());
EXPECT_NE(row.find("label"), row.end());
EXPECT_NE(row.find("speaker_id"), row.end());
EXPECT_NE(row.find("utterance_number"), row.end());
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto waveform = row["waveform"];
MS_LOG(INFO) << "Tensor waveform shape: " << waveform.Shape();
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 1);
// Manually terminate the pipeline.
iter->Stop();
}
/// Feature: Test SpeechCommands dataset.
/// Description: test usage "test".
/// Expectation: the data is processed successfully.
TEST_F(MindDataTestPipeline, TestSpeechCommandsUsageTest) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSpeechCommandsDataset.";
std::string folder_path = datasets_root_path_ + "/testSpeechCommandsData/";
std::shared_ptr<Dataset> ds = SpeechCommands(folder_path, "test", std::make_shared<RandomSampler>(false, 1));
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset.
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row.
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
MS_LOG(INFO) << "iter->GetNextRow(&row) OK";
EXPECT_NE(row.find("waveform"), row.end());
EXPECT_NE(row.find("sample_rate"), row.end());
EXPECT_NE(row.find("label"), row.end());
EXPECT_NE(row.find("speaker_id"), row.end());
EXPECT_NE(row.find("utterance_number"), row.end());
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto waveform = row["waveform"];
MS_LOG(INFO) << "Tensor waveform shape: " << waveform.Shape();
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 1);
// Manually terminate the pipeline.
iter->Stop();
}
/// Feature: Test SpeechCommands dataset.
/// Description: test usage "valid".
/// Expectation: the data is processed successfully.
TEST_F(MindDataTestPipeline, TestSpeechCommandsUsageValid) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSpeechCommandsDataset.";
std::string folder_path = datasets_root_path_ + "/testSpeechCommandsData/";
std::shared_ptr<Dataset> ds = SpeechCommands(folder_path, "valid", std::make_shared<RandomSampler>(false, 1));
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset.
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row.
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
MS_LOG(INFO) << "iter->GetNextRow(&row) OK";
EXPECT_NE(row.find("waveform"), row.end());
EXPECT_NE(row.find("sample_rate"), row.end());
EXPECT_NE(row.find("label"), row.end());
EXPECT_NE(row.find("speaker_id"), row.end());
EXPECT_NE(row.find("utterance_number"), row.end());
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto waveform = row["waveform"];
MS_LOG(INFO) << "Tensor waveform shape: " << waveform.Shape();
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 1);
// Manually terminate the pipeline.
iter->Stop();
}
/// Feature: Test SpeechCommands dataset.
/// Description: error tests.
/// Expectation: throw error messages when certain errors occur.
TEST_F(MindDataTestPipeline, TestSpeechCommandsDatasetFail) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSpeechCommandsDatasetFail.";
// Create a SpeechCommands Dataset.
std::shared_ptr<Dataset> ds = SpeechCommands("", "all", std::make_shared<RandomSampler>(false, 1));
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
// Expect failure: invalid SpeechCommands input.
EXPECT_EQ(iter, nullptr);
}
/// Feature: Test SpeechCommands dataset.
/// Description: test error usages.
/// Expectation: throw error messages when certain errors occur.
TEST_F(MindDataTestPipeline, TestSpeechCommandsDatasetWithInvalidUsageFail) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSpeechCommandsDatasetFail.";
// Create a SpeechCommands Dataset.
std::string folder_path = datasets_root_path_ + "/testSpeechCommandsData/";
std::shared_ptr<Dataset> ds = SpeechCommands(folder_path, "eval", std::make_shared<RandomSampler>(false, 1));
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
// Expect failure: invalid SpeechCommands input.
EXPECT_EQ(iter, nullptr);
}
/// Feature: Test SpeechCommands dataset.
/// Description: test null sample error.
/// Expectation: throw error messages when certain errors occur.
TEST_F(MindDataTestPipeline, TestSpeechCommandsDatasetWithNullSamplerFail) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSpeechCommandsDatasetWithNullSamplerFail.";
// Create a SpeechCommands Dataset.
std::string folder_path = datasets_root_path_ + "/testSpeechCommandsData/";
std::shared_ptr<Dataset> ds = SpeechCommands(folder_path, "all", nullptr);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
// Expect failure: invalid SpeechCommands input, sampler cannot be nullptr.
EXPECT_EQ(iter, nullptr);
}

View File

@ -0,0 +1 @@
backpack/eeee2222_nohash_3.wav

View File

@ -0,0 +1 @@
bob/qwer2222_nohash_2.wav

View File

@ -0,0 +1,199 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Test SpeechCommands dataset operators
"""
import pytest
import numpy as np
import mindspore.dataset as ds
import mindspore.dataset.audio.transforms as audio
from mindspore import log as logger
DATA_DIR = "../data/dataset/testSpeechCommandsData/"
def test_speech_commands_basic():
"""
Feature: SpeechCommands Dataset
Description: Read all files
Expectation: Output the amount of files
"""
logger.info("Test SpeechCommandsDataset Op.")
# case 1: test loading whole dataset
data1 = ds.SpeechCommandsDataset(DATA_DIR)
num_iter1 = 0
for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter1 += 1
assert num_iter1 == 3
# case 2: test num_samples
data2 = ds.SpeechCommandsDataset(DATA_DIR, num_samples=3)
num_iter2 = 0
for _ in data2.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter2 += 1
assert num_iter2 == 3
# case 3: test repeat
data3 = ds.SpeechCommandsDataset(DATA_DIR, num_samples=2)
data3 = data3.repeat(5)
num_iter3 = 0
for _ in data3.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter3 += 1
assert num_iter3 == 10
def test_speech_commands_sequential_sampler():
"""
Feature: SpeechCommands Dataset
Description: Use SequentialSampler to sample data.
Expectation: The number of samplers returned by dict_iterator is equal to the requested number of samples.
"""
logger.info("Test SpeechCommandsDataset with SequentialSampler.")
num_samples = 2
sampler = ds.SequentialSampler(num_samples=num_samples)
data1 = ds.SpeechCommandsDataset(DATA_DIR, sampler=sampler)
data2 = ds.SpeechCommandsDataset(DATA_DIR, shuffle=False, num_samples=num_samples)
sample_rate_list1, sample_rate_list2 = [], []
num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
sample_rate_list1.append(item1["sample_rate"])
sample_rate_list2.append(item2["sample_rate"])
num_iter += 1
np.testing.assert_array_equal(sample_rate_list1, sample_rate_list2)
assert num_iter == num_samples
def test_speech_commands_exception():
"""
Feature: SpeechCommands Dataset
Description: Test error cases for SpeechCommandsDataset
Expectation: Error message
"""
logger.info("Test error cases for SpeechCommandsDataset.")
error_msg_1 = "sampler and shuffle cannot be specified at the same time."
with pytest.raises(RuntimeError, match=error_msg_1):
ds.SpeechCommandsDataset(DATA_DIR, shuffle=False, sampler=ds.PKSampler(3))
error_msg_2 = "sampler and sharding cannot be specified at the same time."
with pytest.raises(RuntimeError, match=error_msg_2):
ds.SpeechCommandsDataset(DATA_DIR, sampler=ds.PKSampler(3), num_shards=2, shard_id=0)
error_msg_3 = "num_shards is specified and currently requires shard_id as well."
with pytest.raises(RuntimeError, match=error_msg_3):
ds.SpeechCommandsDataset(DATA_DIR, num_shards=10)
error_msg_4 = "shard_id is specified but num_shards is not."
with pytest.raises(RuntimeError, match=error_msg_4):
ds.SpeechCommandsDataset(DATA_DIR, shard_id=0)
error_msg_5 = "Input shard_id is not within the required interval."
with pytest.raises(ValueError, match=error_msg_5):
ds.SpeechCommandsDataset(DATA_DIR, num_shards=5, shard_id=-1)
with pytest.raises(ValueError, match=error_msg_5):
ds.SpeechCommandsDataset(DATA_DIR, num_shards=5, shard_id=5)
with pytest.raises(ValueError, match=error_msg_5):
ds.SpeechCommandsDataset(DATA_DIR, num_shards=2, shard_id=5)
error_msg_6 = "num_parallel_workers exceeds."
with pytest.raises(ValueError, match=error_msg_6):
ds.SpeechCommandsDataset(DATA_DIR, shuffle=False, num_parallel_workers=0)
with pytest.raises(ValueError, match=error_msg_6):
ds.SpeechCommandsDataset(DATA_DIR, shuffle=False, num_parallel_workers=256)
with pytest.raises(ValueError, match=error_msg_6):
ds.SpeechCommandsDataset(DATA_DIR, shuffle=False, num_parallel_workers=-2)
error_msg_7 = "Argument shard_id."
with pytest.raises(TypeError, match=error_msg_7):
ds.SpeechCommandsDataset(DATA_DIR, num_shards=2, shard_id="0")
def exception_func(item):
raise Exception("Error occur!")
error_msg_8 = "The corresponding data files."
with pytest.raises(RuntimeError, match=error_msg_8):
data = ds.SpeechCommandsDataset(DATA_DIR)
data = data.map(operations=exception_func, input_columns=["waveform"], num_parallel_workers=1)
for _ in data.__iter__():
pass
with pytest.raises(RuntimeError, match=error_msg_8):
data = ds.SpeechCommandsDataset(DATA_DIR)
data = data.map(operations=exception_func, input_columns=["sample_rate"], num_parallel_workers=1)
for _ in data.__iter__():
pass
def test_speech_commands_usage():
"""
Feature: SpeechCommands Dataset
Description: Usage Test
Expectation: Get the result of each function
"""
logger.info("Test SpeechCommandsDataset usage flag.")
def test_config(usage, speech_commands_path=DATA_DIR):
try:
data = ds.SpeechCommandsDataset(speech_commands_path, usage=usage)
num_rows = 0
for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
num_rows += 1
except (ValueError, TypeError, RuntimeError) as e:
return str(e)
return num_rows
# test the usage of SpeechCommands
assert test_config("test") == 1
assert test_config("train") == 1
assert test_config("valid") == 1
assert test_config("all") == 3
assert "usage is not within the valid set of ['train', 'test', 'valid', 'all']." in test_config("invalid")
# change this directory to the folder that contains all SpeechCommands files
all_speech_commands = None
if all_speech_commands is not None:
assert test_config("test", all_speech_commands) == 11005
assert test_config("valid", all_speech_commands) == 9981
assert test_config("train", all_speech_commands) == 84843
assert test_config("all", all_speech_commands) == 105829
assert ds.SpeechCommandsDataset(all_speech_commands, usage="test").get_dataset_size() == 11005
assert ds.SpeechCommandsDataset(all_speech_commands, usage="valid").get_dataset_size() == 9981
assert ds.SpeechCommandsDataset(all_speech_commands, usage="train").get_dataset_size() == 84843
assert ds.SpeechCommandsDataset(all_speech_commands, usage="all").get_dataset_size() == 105829
def test_speech_commands_pipeline():
"""
Feature: Pipeline test
Description: Read a sample
Expectation: Test BandBiquad by pipeline
"""
dataset = ds.SpeechCommandsDataset(DATA_DIR, num_samples=1)
band_biquad_op = audio.BandBiquad(8000, 200.0)
# Filtered waveform by bandbiquad
dataset = dataset.map(input_columns=["waveform"], operations=band_biquad_op, num_parallel_workers=4)
i = 0
for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
i += 1
assert i == 1
if __name__ == '__main__':
test_speech_commands_basic()
test_speech_commands_sequential_sampler()
test_speech_commands_exception()
test_speech_commands_usage()
test_speech_commands_pipeline()