forked from mindspore-Ecosystem/mindspore
Modify VOCDataset doc & Fix BuildVocab bug
This commit is contained in:
parent
e06dc0a946
commit
8aba39a71b
|
@ -324,7 +324,7 @@ std::shared_ptr<Vocab> Dataset::BuildVocab(const std::vector<std::string> &colum
|
||||||
// Finish building vocab by triggering GetNextRow
|
// Finish building vocab by triggering GetNextRow
|
||||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||||
iter->GetNextRow(&row);
|
iter->GetNextRow(&row);
|
||||||
if (vocab == nullptr) {
|
if (vocab->vocab().empty()) {
|
||||||
MS_LOG(ERROR) << "Fail to build vocab.";
|
MS_LOG(ERROR) << "Fail to build vocab.";
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
|
@ -107,6 +107,9 @@ class Vocab {
|
||||||
// @param std::string & word - word to be added will skip if word already exists
|
// @param std::string & word - word to be added will skip if word already exists
|
||||||
void append_word(const std::string &word);
|
void append_word(const std::string &word);
|
||||||
|
|
||||||
|
// return a read-only vocab
|
||||||
|
const std::unordered_map<WordType, WordIdType> vocab() { return word2id_; }
|
||||||
|
|
||||||
// destructor
|
// destructor
|
||||||
~Vocab() = default;
|
~Vocab() = default;
|
||||||
|
|
||||||
|
|
|
@ -4456,8 +4456,8 @@ class VOCDataset(MappableDataset):
|
||||||
task (str): Set the task type of reading voc data, now only support "Segmentation" or "Detection"
|
task (str): Set the task type of reading voc data, now only support "Segmentation" or "Detection"
|
||||||
(default="Segmentation").
|
(default="Segmentation").
|
||||||
mode (str): Set the data list txt file to be readed (default="train").
|
mode (str): Set the data list txt file to be readed (default="train").
|
||||||
class_indexing (dict, optional): A str-to-int mapping from label name to index
|
class_indexing (dict, optional): A str-to-int mapping from label name to index, only valid in
|
||||||
(default=None, the folder names will be sorted alphabetically and each
|
"Detection" task (default=None, the folder names will be sorted alphabetically and each
|
||||||
class will be given a unique index starting from 0).
|
class will be given a unique index starting from 0).
|
||||||
num_samples (int, optional): The number of images to be included in the dataset
|
num_samples (int, optional): The number of images to be included in the dataset
|
||||||
(default=None, all images).
|
(default=None, all images).
|
||||||
|
|
|
@ -252,3 +252,17 @@ TEST_F(MindDataTestPipeline, TestVocabFromDatasetFail2) {
|
||||||
std::numeric_limits<int64_t>::max(), {"<pad>", "<unk>"}, true);
|
std::numeric_limits<int64_t>::max(), {"<pad>", "<unk>"}, true);
|
||||||
EXPECT_EQ(vocab, nullptr);
|
EXPECT_EQ(vocab, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(MindDataTestPipeline, TestVocabFromDatasetFail3) {
|
||||||
|
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVocabFromDatasetFail3.";
|
||||||
|
|
||||||
|
// Create a TextFile dataset
|
||||||
|
std::string data_file = datasets_root_path_ + "/testVocab/words.txt";
|
||||||
|
std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
|
||||||
|
EXPECT_NE(ds, nullptr);
|
||||||
|
|
||||||
|
// Create vocab from dataset
|
||||||
|
// Expected failure: column name does not exist in ds
|
||||||
|
std::shared_ptr<Vocab> vocab = ds->BuildVocab({"ColumnNotExist"});
|
||||||
|
EXPECT_EQ(vocab, nullptr);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue