!22712 [assistant][ops] Add FastText

Merge pull request !22712 from 无言/FastText
2021-12-01 06:50:59 +00:00 · 2021-12-01 06:50:59 +00:00 · 0c6505db26
parent b3c51fc2aa 5ca98a494a
commit 0c6505db26
18 changed files with 922 additions and 5 deletions
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/bindings.cc
@ -19,6 +19,7 @@
 #include "minddata/dataset/api/python/pybind_register.h"
 #include "minddata/dataset/include/dataset/constants.h"
 #include "minddata/dataset/text/fast_text.h"
 #include "minddata/dataset/text/sentence_piece_vocab.h"
 #include "minddata/dataset/text/vectors.h"
 #include "minddata/dataset/text/vocab.h"
@ -88,6 +89,16 @@ PYBIND_REGISTER(SentencePieceModel, 0, ([](const py::module *m) {
                    .export_values();
                }));
 PYBIND_REGISTER(FastText, 1, ([](const py::module *m) {
                  (void)py::class_<FastText, Vectors, std::shared_ptr<FastText>>(*m, "FastText")
                    .def(py::init<>())
                    .def_static("from_file", [](const std::string &path, int32_t max_vectors) {
                      std::shared_ptr<FastText> fast_text;
                      THROW_IF_ERROR(FastText::BuildFromFile(&fast_text, path, max_vectors));
                      return fast_text;
                    });
                }));
 PYBIND_REGISTER(Vectors, 0, ([](const py::module *m) {
                  (void)py::class_<Vectors, std::shared_ptr<Vectors>>(*m, "Vectors")
                    .def(py::init<>())
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/text.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/text.h
@ -630,7 +630,7 @@ class MS_API ToNumber final : public TensorTransform {
 };
 /// \brief Look up a token into an vector according to the input Vectors table.
-class ToVectors final : public TensorTransform {
+class MS_API ToVectors final : public TensorTransform {
 public:
  /// \brief Constructor.
  /// \param[in] vectors A Vectors object.
--- a/mindspore/ccsrc/minddata/dataset/text/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/text/CMakeLists.txt
@ -4,9 +4,10 @@ add_subdirectory(kernels)
 file(GLOB _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(text OBJECT
        fast_text.cc
        sentence_piece_vocab.cc
        vectors.cc
        vocab.cc
        sentence_piece_vocab.cc
        )
 add_dependencies(text text-kernels)
--- a/mindspore/ccsrc/minddata/dataset/text/fast_text.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/fast_text.cc
@ -0,0 +1,50 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "minddata/dataset/text/fast_text.h"
 #include "utils/file_utils.h"
 namespace mindspore {
 namespace dataset {
 FastText::FastText(const std::unordered_map<std::string, std::vector<float>> &map, int dim) : Vectors(map, dim) {}
 Status CheckFastText(const std::string &file_path) {
  Path path = Path(file_path);
  if (path.Exists() && !path.IsDirectory()) {
    std::string basename = path.Basename();
    size_t dot = basename.rfind('.');
    std::string suffix = basename.substr(dot + 1);
    if (suffix != "vec") {
      RETURN_STATUS_UNEXPECTED("FastText: invalid file, can not find file '*.vec', but got: " + file_path);
    }
    return Status::OK();
  } else {
    RETURN_STATUS_UNEXPECTED("FastText: invalid file, failed to open FastText file.");
  }
 }
 Status FastText::BuildFromFile(std::shared_ptr<FastText> *fast_text, const std::string &path, int32_t max_vectors) {
  RETURN_UNEXPECTED_IF_NULL(fast_text);
  RETURN_IF_NOT_OK(CheckFastText(path));
  std::unordered_map<std::string, std::vector<float>> map;
  int vector_dim = -1;
  RETURN_IF_NOT_OK(Load(path, max_vectors, &map, &vector_dim));
  *fast_text = std::make_shared<FastText>(std::move(map), vector_dim);
  return Status::OK();
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/text/fast_text.h
+++ b/mindspore/ccsrc/minddata/dataset/text/fast_text.h
@ -0,0 +1,55 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_FAST_TEXT_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_FAST_TEXT_H_
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 #include "minddata/dataset/core/tensor.h"
 #include "minddata/dataset/include/dataset/iterator.h"
 #include "minddata/dataset/text/vectors.h"
 #include "minddata/dataset/util/path.h"
 namespace mindspore {
 namespace dataset {
 /// \brief Pre-train word vectors.
 class FastText : public Vectors {
 public:
  /// Constructor.
  FastText() = default;
  /// Constructor.
  /// \param[in] map A map between string and vector.
  /// \param[in] dim Dimension of the vectors.
  FastText(const std::unordered_map<std::string, std::vector<float>> &map, int dim);
  /// Destructor.
  ~FastText() = default;
  /// \brief Build Vectors from reading a pre-train vector file.
  /// \param[out] fast_text FastText object which contains the pre-train vectors.
  /// \param[in] path Path to the pre-trained word vector file. The suffix of set must be `*.vec`.
  /// \param[in] max_vectors This can be used to limit the number of pre-trained vectors loaded (default=0, no limit).
  static Status BuildFromFile(std::shared_ptr<FastText> *fast_text, const std::string &path, int32_t max_vectors = 0);
 };
 }  // namespace dataset
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_FAST_TEXT_H_
--- a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h
@ -30,6 +30,8 @@ namespace dataset {
 class Vectors;
 class Vocab;
 class SentencePieceVocab;
 class Vectors;
 class Vocab;
 // Transform operations for text
 namespace text {
--- a/mindspore/dataset/text/init.py
+++ b/mindspore/dataset/text/init.py
@ -28,13 +28,13 @@ import platform
 from .transforms import Lookup, JiebaTokenizer, UnicodeCharTokenizer, Ngram, WordpieceTokenizer, \
    TruncateSequencePair, ToNumber, SlidingWindow, SentencePieceTokenizer, PythonTokenizer, ToVectors
 from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm, SentencePieceVocab, SentencePieceModel, \
-    SPieceTokenizerOutType, SPieceTokenizerLoadType, Vectors
+    SPieceTokenizerOutType, SPieceTokenizerLoadType, Vectors, FastText
 __all__ = [
    "Lookup", "JiebaTokenizer", "UnicodeCharTokenizer", "Ngram",
    "to_str", "to_bytes", "Vocab", "WordpieceTokenizer", "TruncateSequencePair", "ToNumber",
    "PythonTokenizer", "SlidingWindow", "SentencePieceVocab", "SentencePieceTokenizer", "SPieceTokenizerOutType",
-    "SentencePieceModel", "SPieceTokenizerLoadType", "JiebaMode", "NormalizeForm", "Vectors", "ToVectors"
+    "SentencePieceModel", "SPieceTokenizerLoadType", "JiebaMode", "NormalizeForm", "Vectors", "ToVectors", "FastText"
 ]
 if platform.system().lower() != 'windows':
--- a/mindspore/dataset/text/utils.py
+++ b/mindspore/dataset/text/utils.py
@ -27,7 +27,7 @@ from .validators import check_from_file, check_from_list, check_from_dict, check
    check_from_file_vectors
 __all__ = [
-    "Vocab", "SentencePieceVocab", "to_str", "to_bytes", "Vectors"
+    "Vocab", "SentencePieceVocab", "to_str", "to_bytes", "Vectors", "FastText"
 ]
@ -411,3 +411,30 @@ class Vectors(cde.Vectors):
        max_vectors = max_vectors if max_vectors is not None else 0
        return super().from_file(file_path, max_vectors)
 class FastText(cde.FastText):
    """
    FastText object that is used to map tokens into vectors.
    """
    @classmethod
    @check_from_file_vectors
    def from_file(cls, file_path, max_vectors=None):
        """
        Build a FastText vector from a file.
        Args:
            file_path (str): Path of the file that contains the vectors. The shuffix of pre-trained vector sets
                must be `*.vec`.
            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
                Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
                situations where the entire set doesn’t fit in memory, or is not needed for another reason,
                passing max_vectors can limit the size of the loaded set (default=None, no limit).
        Examples:
            >>> fast_text = text.FastText.from_file("/path/to/fast_text/file", max_vectors=None)
        """
        max_vectors = max_vectors if max_vectors is not None else 0
        return super().from_file(file_path, max_vectors)
--- a/tests/ut/cpp/dataset/c_api_text_test.cc
+++ b/tests/ut/cpp/dataset/c_api_text_test.cc
@ -23,11 +23,13 @@
 #include "minddata/dataset/include/dataset/datasets.h"
 #include "minddata/dataset/include/dataset/text.h"
 #include "minddata/dataset/include/dataset/transforms.h"
 #include "minddata/dataset/text/fast_text.h"
 #include "minddata/dataset/text/vectors.h"
 #include "minddata/dataset/text/vocab.h"
 using namespace mindspore::dataset;
 using mindspore::Status;
 using mindspore::dataset::FastText;
 using mindspore::dataset::ShuffleMode;
 using mindspore::dataset::Tensor;
 using mindspore::dataset::Vectors;
@ -3943,3 +3945,357 @@ TEST_F(MindDataTestPipeline, TestVectorsWithWrongInfoFile) {
  Status s = Vectors::BuildFromFile(&vectors, vectors_dir);
  EXPECT_NE(s, Status::OK());
 }
 /// Feature: FastText
 /// Description: test with default parameter in function BuildFromFile and function Lookup
 /// Expectation: return correct MSTensor which is equal to the expected
 TEST_F(MindDataTestPipeline, TestFastTextDefaultParam) {
  // Test with default parameter.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextDefaultParam.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);
  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fast_text.vec";
  std::shared_ptr<FastText> fast_text;
  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
  EXPECT_EQ(s, Status::OK());
  std::shared_ptr<TensorTransform> lookup = std::make_shared<text::ToVectors>(fast_text);
  EXPECT_NE(lookup, nullptr);
  // Create Map operation on ds
  ds = ds->Map({lookup}, {"text"});
  EXPECT_NE(ds, nullptr);
  // Create an iterator over the result of the above dataset
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);
  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));
  uint64_t i = 0;
  std::vector<std::vector<float>> expected = {{0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411},
                                              {0, 0, 0, 0, 0, 0},
                                              {0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973},
                                              {0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603},
                                              {0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246},
                                              {0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923},
                                              {0, 0, 0, 0, 0, 0}};
  while (row.size() != 0) {
    auto ind = row["text"];
    MS_LOG(INFO) << ind.Shape();
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    TensorPtr de_expected_item;
    dsize_t dim = 6;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], TensorShape({dim}), &de_expected_item));
    mindspore::MSTensor ms_expected_item =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_item));
    EXPECT_MSTENSOR_EQ(ind, ms_expected_item);
    ASSERT_OK(iter->GetNextRow(&row));
    i++;
  }
  EXPECT_EQ(i, 7);
  // Manually terminate the pipeline
  iter->Stop();
 }
 /// Feature: FastText
 /// Description: test with all parameters which include `path` and `max_vector` in function BuildFromFile
 /// Expectation: return correct MSTensor which is equal to the expected
 TEST_F(MindDataTestPipeline, TestFastTextAllBuildfromfileParams) {
  // Test with two parameters.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextAllBuildfromfileParams.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);
  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fast_text.vec";
  std::shared_ptr<FastText> fast_text;
  Status s = FastText::BuildFromFile(&fast_text, vectors_dir, 100);
  EXPECT_EQ(s, Status::OK());
  std::shared_ptr<TensorTransform> lookup = std::make_shared<text::ToVectors>(fast_text);
  EXPECT_NE(lookup, nullptr);
  // Create Map operation on ds
  ds = ds->Map({lookup}, {"text"});
  EXPECT_NE(ds, nullptr);
  // Create an iterator over the result of the above dataset
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);
  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));
  uint64_t i = 0;
  std::vector<std::vector<float>> expected = {{0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411},
                                              {0, 0, 0, 0, 0, 0},
                                              {0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973},
                                              {0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603},
                                              {0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246},
                                              {0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923},
                                              {0, 0, 0, 0, 0, 0}};
  while (row.size() != 0) {
    auto ind = row["text"];
    MS_LOG(INFO) << ind.Shape();
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    TensorPtr de_expected_item;
    dsize_t dim = 6;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], TensorShape({dim}), &de_expected_item));
    mindspore::MSTensor ms_expected_item =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_item));
    EXPECT_MSTENSOR_EQ(ind, ms_expected_item);
    ASSERT_OK(iter->GetNextRow(&row));
    i++;
  }
  EXPECT_EQ(i, 7);
  // Manually terminate the pipeline
  iter->Stop();
 }
 /// Feature: FastText
 /// Description: test with all parameters in function BuildFromFile and `unknown_init` in function Lookup
 /// Expectation: return correct MSTensor which is equal to the expected
 TEST_F(MindDataTestPipeline, TestFastTextUnknownInit) {
  // Test with two parameters.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextUnknownInit.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);
  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fast_text.vec";
  std::shared_ptr<FastText> fast_text;
  Status s = FastText::BuildFromFile(&fast_text, vectors_dir, 100);
  EXPECT_EQ(s, Status::OK());
  std::vector<float> unknown_init = {-1, -1, -1, -1, -1, -1};
  std::shared_ptr<TensorTransform> lookup = std::make_shared<text::ToVectors>(fast_text, unknown_init);
  EXPECT_NE(lookup, nullptr);
  // Create Map operation on ds
  ds = ds->Map({lookup}, {"text"});
  EXPECT_NE(ds, nullptr);
  // Create an iterator over the result of the above dataset
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);
  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));
  uint64_t i = 0;
  std::vector<std::vector<float>> expected = {{0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411},
                                              {-1, -1, -1, -1, -1, -1},
                                              {0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973},
                                              {0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603},
                                              {0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246},
                                              {0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923},
                                              {-1, -1, -1, -1, -1, -1}};
  while (row.size() != 0) {
    auto ind = row["text"];
    MS_LOG(INFO) << ind.Shape();
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    TensorPtr de_expected_item;
    dsize_t dim = 6;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], TensorShape({dim}), &de_expected_item));
    mindspore::MSTensor ms_expected_item =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_item));
    EXPECT_MSTENSOR_EQ(ind, ms_expected_item);
    ASSERT_OK(iter->GetNextRow(&row));
    i++;
  }
  EXPECT_EQ(i, 7);
  // Manually terminate the pipeline
  iter->Stop();
 }
 /// Feature: FastText
 /// Description: test with all parameters which include `path` and `max_vectors` in function BuildFromFile and `token`,
 ///     `unknown_init` and `lower_case_backup` in function Lookup. But some tokens have some big letters
 /// Expectation: return correct MSTensor which is equal to the expected
 TEST_F(MindDataTestPipeline, TestFastTextAllParams) {
  //  Test with all parameters.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextAllParams.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);
  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fast_text.vec";
  std::shared_ptr<FastText> fast_text;
  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
  EXPECT_EQ(s, Status::OK());
  std::vector<float> unknown_init = {-1, -1, -1, -1, -1, -1};
  std::shared_ptr<TensorTransform> lookup = std::make_shared<text::ToVectors>(fast_text, unknown_init, true);
  EXPECT_NE(lookup, nullptr);
  // Create Map operation on ds
  ds = ds->Map({lookup}, {"text"});
  EXPECT_NE(ds, nullptr);
  // Create an iterator over the result of the above dataset
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);
  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));
  uint64_t i = 0;
  std::vector<std::vector<float>> expected = {{0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411},
                                              {-1, -1, -1, -1, -1, -1},
                                              {0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973},
                                              {0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603},
                                              {0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246},
                                              {0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923},
                                              {-1, -1, -1, -1, -1, -1}};
  while (row.size() != 0) {
    auto ind = row["text"];
    MS_LOG(INFO) << ind.Shape();
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    TensorPtr de_expected_item;
    dsize_t dim = 6;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], TensorShape({dim}), &de_expected_item));
    mindspore::MSTensor ms_expected_item =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_item));
    EXPECT_MSTENSOR_EQ(ind, ms_expected_item);
    ASSERT_OK(iter->GetNextRow(&row));
    i++;
  }
  EXPECT_EQ(i, 7);
  // Manually terminate the pipeline
  iter->Stop();
 }
 /// Feature: FastText
 /// Description: test with pre-vectors set that have the different dimension
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestFastTextDifferentDimension) {
  //  Tokens don't have the same number of vectors.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextDifferentDimension.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);
  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fasttext_dim_different.vec";
  std::shared_ptr<FastText> fast_text;
  Status s = FastText::BuildFromFile(&fast_text, vectors_dir, 100);
  EXPECT_NE(s, Status::OK());
 }
 /// Feature: FastText
 /// Description: test with the parameter max_vectors that is <= 0
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestFastTextMaxVectorsLessThanZero) {
  //  Test with max_vectors <= 0.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextMaxVectorsLessThanZero.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);
  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fast_text.vec";
  std::shared_ptr<FastText> fast_text;
  Status s = FastText::BuildFromFile(&fast_text, vectors_dir, -1);
  EXPECT_NE(s, Status::OK());
 }
 /// Feature: FastText
 /// Description: test with the pre-vectors file that is empty
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestFastTextWithEmptyFile) {
  //  Read empty file.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextWithEmptyFile.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);
  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fasttext_empty.vec";
  std::shared_ptr<FastText> fast_text;
  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
  EXPECT_NE(s, Status::OK());
 }
 /// Feature: FastText
 /// Description: test with the pre-vectors file that is not exist
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestFastTextWithNotExistFile) {
  //  Test with not exist file.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextWithNotExistFile.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);
  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/no_fasttext.vec";
  std::shared_ptr<FastText> fast_text;
  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
  EXPECT_NE(s, Status::OK());
 }
 /// Feature: FastText
 /// Description: test with the pre-vectors set that has a situation that info-head is not the first line in the set
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestFastTextWithWrongInfoFile) {
  //  wrong info.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextWithWrongInfoFile.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);
  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fasttext_with_wrong_info.vec";
  std::shared_ptr<FastText> fast_text;
  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
  EXPECT_NE(s, Status::OK());
 }
 /// Feature: FastText
 /// Description: test with the pre-vectors set that has a wrong suffix
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestFastTextWithWrongSuffix) {
  //  wrong info.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextWithWrongSuffix.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);
  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fast_text.txt";
  std::shared_ptr<FastText> fast_text;
  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
  EXPECT_NE(s, Status::OK());
 }
--- a/tests/ut/cpp/dataset/execute_test.cc
+++ b/tests/ut/cpp/dataset/execute_test.cc
@ -23,11 +23,13 @@
 #include "minddata/dataset/include/dataset/vision.h"
 #include "minddata/dataset/include/dataset/audio.h"
 #include "minddata/dataset/include/dataset/text.h"
 #include "minddata/dataset/text/fast_text.h"
 #include "minddata/dataset/text/vectors.h"
 #include "utils/log_adapter.h"
 using namespace mindspore::dataset;
 using mindspore::LogStream;
 using mindspore::dataset::FastText;
 using mindspore::dataset::Vectors;
 using mindspore::ExceptionType::NoExceptionType;
 using mindspore::MsLogLevel::INFO;
@ -1665,6 +1667,140 @@ TEST_F(MindDataTestExecute, TestToVectorsWithInvalidParam) {
  EXPECT_FALSE(status02.IsOk());
 }
 /// Feature: FastText
 /// Description: test basic usage of FastText and the ToVectors with default parameter
 /// Expectation: get correct MSTensor
 TEST_F(MindDataTestExecute, TestFastTextParam) {
  MS_LOG(INFO) << "Doing MindDataTestExecute-TestFastTextParam.";
  std::shared_ptr<Tensor> de_tensor;
  Tensor::CreateScalar<std::string>("ok", &de_tensor);
  auto token = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor));
  mindspore::MSTensor lookup_result;
  // Create expected output.
  std::shared_ptr<Tensor> de_expected;
  std::vector<float> expected = {0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411};
  dsize_t dim = 6;
  ASSERT_OK(Tensor::CreateFromVector(expected, TensorShape({dim}), &de_expected));
  auto ms_expected = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected));
  // Transform params.
  std::string vectors_dir = "data/dataset/test_fast_text/fast_text.vec";
  std::shared_ptr<FastText> fast_text01;
  Status s01 = FastText::BuildFromFile(&fast_text01, vectors_dir);
  EXPECT_EQ(s01, Status::OK());
  std::shared_ptr<TensorTransform> to_vectors01 = std::make_shared<text::ToVectors>(fast_text01);
  auto transform01 = Execute({to_vectors01});
  Status status01 = transform01(token, &lookup_result);
  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected);
  EXPECT_TRUE(status01.IsOk());
  std::shared_ptr<FastText> fast_text02;
  Status s02 = FastText::BuildFromFile(&fast_text02, vectors_dir, 100);
  EXPECT_EQ(s02, Status::OK());
  std::shared_ptr<TensorTransform> to_vectors02 = std::make_shared<text::ToVectors>(fast_text02);
  auto transform02 = Execute({to_vectors02});
  Status status02 = transform02(token, &lookup_result);
  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected);
  EXPECT_TRUE(status02.IsOk());
  std::shared_ptr<FastText> fast_text03;
  Status s03 = FastText::BuildFromFile(&fast_text03, vectors_dir, 3);
  EXPECT_EQ(s03, Status::OK());
  std::shared_ptr<TensorTransform> to_vectors03 = std::make_shared<text::ToVectors>(fast_text03);
  auto transform03 = Execute({to_vectors03});
  Status status03 = transform03(token, &lookup_result);
  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected);
  EXPECT_TRUE(status03.IsOk());
 }
 /// Feature: ToVectors
 /// Description: test basic usage of ToVectors and the FastText with default parameter
 /// Expectation: get correct MSTensor
 TEST_F(MindDataTestExecute, TestToVectorsParamForFastText) {
  MS_LOG(INFO) << "Doing MindDataTestExecute-TestToVectorsParamForFastText.";
  std::shared_ptr<Tensor> de_tensor01;
  Tensor::CreateScalar<std::string>("none", &de_tensor01);
  auto token01 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor01));
  std::shared_ptr<Tensor> de_tensor02;
  Tensor::CreateScalar<std::string>("ok", &de_tensor02);
  auto token02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor02));
  std::shared_ptr<Tensor> de_tensor03;
  Tensor::CreateScalar<std::string>("OK", &de_tensor03);
  auto token03 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor03));
  mindspore::MSTensor lookup_result;
  // Create expected output.
  dsize_t dim = 6;
  std::shared_ptr<Tensor> de_expected01;
  std::vector<float> expected01 = {0, 0, 0, 0, 0, 0};
  ASSERT_OK(Tensor::CreateFromVector(expected01, TensorShape({dim}), &de_expected01));
  auto ms_expected01 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected01));
  std::shared_ptr<Tensor> de_expected02;
  std::vector<float> expected02 = {-1, -1, -1, -1, -1, -1};
  ASSERT_OK(Tensor::CreateFromVector(expected02, TensorShape({dim}), &de_expected02));
  auto ms_expected02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected02));
  std::shared_ptr<Tensor> de_expected03;
  std::vector<float> expected03 = {0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411};
  ASSERT_OK(Tensor::CreateFromVector(expected03, TensorShape({dim}), &de_expected03));
  auto ms_expected03 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected03));
  // Transform params.
  std::string vectors_dir = "data/dataset/test_fast_text/fast_text.vec";
  std::shared_ptr<FastText> fast_text;
  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
  EXPECT_EQ(s, Status::OK());
  std::shared_ptr<TensorTransform> to_vectors01 = std::make_shared<text::ToVectors>(fast_text);
  auto transform01 = Execute({to_vectors01});
  Status status01 = transform01(token01, &lookup_result);
  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected01);
  EXPECT_TRUE(status01.IsOk());
  std::vector<float> unknown_init = {-1, -1, -1, -1, -1, -1};
  std::shared_ptr<TensorTransform> to_vectors02 = std::make_shared<text::ToVectors>(fast_text, unknown_init);
  auto transform02 = Execute({to_vectors02});
  Status status02 = transform02(token01, &lookup_result);
  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected02);
  EXPECT_TRUE(status02.IsOk());
  std::shared_ptr<TensorTransform> to_vectors03 = std::make_shared<text::ToVectors>(fast_text, unknown_init);
  auto transform03 = Execute({to_vectors03});
  Status status03 = transform03(token02, &lookup_result);
  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected03);
  EXPECT_TRUE(status03.IsOk());
  std::shared_ptr<TensorTransform> to_vectors04 = std::make_shared<text::ToVectors>(fast_text, unknown_init, true);
  auto transform04 = Execute({to_vectors04});
  Status status04 = transform04(token03, &lookup_result);
  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected03);
  EXPECT_TRUE(status04.IsOk());
 }
 /// Feature: ToVectors
 /// Description: test invalid parameter of ToVectors for FastText
 /// Expectation: throw exception correctly
 TEST_F(MindDataTestExecute, TestToVectorsWithInvalidParamForFastText) {
  MS_LOG(INFO) << "Doing MindDataTestExecute-TestToVectorsWithInvalidParamForFastText.";
  std::shared_ptr<Tensor> de_tensor;
  Tensor::CreateScalar<std::string>("none", &de_tensor);
  auto token = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor));
  mindspore::MSTensor lookup_result;
  // Transform params.
  std::string vectors_dir = "data/dataset/test_fast_text/fast_text.vec";
  std::shared_ptr<FastText> fast_text01;
  Status s = FastText::BuildFromFile(&fast_text01, vectors_dir);
  EXPECT_EQ(s, Status::OK());
  std::vector<float> unknown_init = {-1, -1, -1, -1};
  std::shared_ptr<TensorTransform> to_vectors01 = std::make_shared<text::ToVectors>(fast_text01, unknown_init);
  auto transform01 = Execute({to_vectors01});
  Status status01 = transform01(token, &lookup_result);
  EXPECT_FALSE(status01.IsOk());
  std::shared_ptr<FastText> fast_text02 = nullptr;
  std::shared_ptr<TensorTransform> to_vectors02 = std::make_shared<text::ToVectors>(fast_text02);
  auto transform02 = Execute({to_vectors02});
  Status status02 = transform02(token, &lookup_result);
  EXPECT_FALSE(status02.IsOk());
 }
 // Feature: DBToAmplitude
 // Description: test DBToAmplitude in eager mode
 // Expectation: the data is processed successfully
--- a/tests/ut/data/dataset/test_fast_text/fast_text.txt
+++ b/tests/ut/data/dataset/test_fast_text/fast_text.txt
@ -0,0 +1,7 @@
 6 6
 ok 0.418 0.24968 -0.41242 0.1217 0.34527 -0.04445718411
 ! 0.013441 0.23682 -0.16899 0.40951 0.63812 0.47709 
 this 0.15164 0.30177 -0.16763 0.17684 0.31719 0.33973 
 is 0.70853 0.57088 -0.4716 0.18048 0.54449 0.72603 
 my 0.68047 -0.039263 0.30186 -0.17792 0.42962 0.032246 
 home 0.26818 0.14346 -0.27877 0.016257 0.11384 0.69923 
--- a/tests/ut/data/dataset/test_fast_text/fast_text.vec
+++ b/tests/ut/data/dataset/test_fast_text/fast_text.vec
@ -0,0 +1,7 @@
 6 6
 ok 0.418 0.24968 -0.41242 0.1217 0.34527 -0.04445718411
 ! 0.013441 0.23682 -0.16899 0.40951 0.63812 0.47709 
 this 0.15164 0.30177 -0.16763 0.17684 0.31719 0.33973 
 is 0.70853 0.57088 -0.4716 0.18048 0.54449 0.72603 
 my 0.68047 -0.039263 0.30186 -0.17792 0.42962 0.032246 
 home 0.26818 0.14346 -0.27877 0.016257 0.11384 0.69923 
--- a/tests/ut/data/dataset/test_fast_text/fast_text_dim_different.vec
+++ b/tests/ut/data/dataset/test_fast_text/fast_text_dim_different.vec
@ -0,0 +1,7 @@
 6 6
 ok 0.418 0.24968 -0.41242 0.1217 0.34527 -0.04445718411
 ! 0.013441 0.23682 -0.16899 0.40951 0.63812 0.47709 
 this 0.15164 0.30177 -0.16763 0.17684 0.31719 
 is 0.70853 0.57088 -0.4716 0.18048 0.54449 0.72603 
 my 0.68047 -0.039263 0.30186 -0.17792 0.42962 0.032246 
 home 0.26818 0.14346 -0.27877 0.016257 0.11384 0.69923
--- a/tests/ut/data/dataset/test_fast_text/fast_text_empty.vec
+++ b/tests/ut/data/dataset/test_fast_text/fast_text_empty.vec
--- a/tests/ut/data/dataset/test_fast_text/fast_text_with_wrong_info.vec
+++ b/tests/ut/data/dataset/test_fast_text/fast_text_with_wrong_info.vec
@ -0,0 +1,7 @@
 the 0.418 0.24968 -0.41242 0.1217 0.34527 -0.04445718411
 , 0.013441 0.23682 -0.16899 0.40951 0.63812 0.47709 
 . 0.15164 0.30177 -0.16763 0.17684 0.31719 0.33973
 6 6 
 of 0.70853 0.57088 -0.4716 0.18048 0.54449 0.72603 
 to 0.68047 -0.039263 0.30186 -0.17792 0.42962 0.032246 
 and 0.26818 0.14346 -0.27877 0.016257 0.11384 0.69923
--- a/tests/ut/data/dataset/test_fast_text/words.txt
+++ b/tests/ut/data/dataset/test_fast_text/words.txt
@ -0,0 +1,7 @@
 ok
 .
 this
 is
 my
 home
 .
--- a/tests/ut/data/dataset/test_fast_text/words_with_big_letter.txt
+++ b/tests/ut/data/dataset/test_fast_text/words_with_big_letter.txt
@ -0,0 +1,7 @@
 ok
 !
 This
 iS
 my
 HOME
 .
--- a/tests/ut/python/dataset/test_fast_text.py
+++ b/tests/ut/python/dataset/test_fast_text.py
@ -0,0 +1,237 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 import numpy as np
 import pytest
 from mindspore import log
 import mindspore.dataset as ds
 import mindspore.dataset.text as text
 import mindspore.dataset.text.transforms as T
 DATASET_ROOT_PATH = "../data/dataset/test_fast_text/"
 def test_fast_text_all_build_from_file_params():
    """
    Feature: FastText
    Description: test with all parameters which include `path` and `max_vector` in function BuildFromFile
    Expectation: output is equal to the expected value
    """
    vectors = text.FastText.from_file(DATASET_ROOT_PATH + "fast_text.vec", max_vectors=100)
    to_vectors = text.ToVectors(vectors)
    data = ds.TextFileDataset(DATASET_ROOT_PATH + "words.txt", shuffle=False)
    data = data.map(operations=to_vectors, input_columns=["text"])
    ind = 0
    res = [[0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411],
           [0, 0, 0, 0, 0, 0],
           [0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973],
           [0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603],
           [0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246],
           [0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923],
           [0, 0, 0, 0, 0, 0]]
    print(data)
    for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
        res_array = np.array(res[ind], dtype=np.float32)
        assert np.array_equal(res_array, d["text"]), ind
        ind += 1
 def test_fast_text_all_build_from_file_params_eager():
    """
    Feature: FastText
    Description: test with all parameters which include `path` and `max_vector` in function BuildFromFile in eager mode
    Expectation: output is equal to the expected value
    """
    vectors = text.FastText.from_file(DATASET_ROOT_PATH + "fast_text.vec", max_vectors=4)
    to_vectors = T.ToVectors(vectors)
    result1 = to_vectors("ok")
    result2 = to_vectors("!")
    result3 = to_vectors("this")
    result4 = to_vectors("is")
    result5 = to_vectors("my")
    result6 = to_vectors("home")
    result7 = to_vectors("none")
    res = [[0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411],
           [0.013441, 0.23682, -0.16899, 0.40951, 0.63812, 0.47709],
           [0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973],
           [0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603],
           [0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0]]
    res_array = np.array(res, dtype=np.float32)
    assert np.array_equal(result1, res_array[0])
    assert np.array_equal(result2, res_array[1])
    assert np.array_equal(result3, res_array[2])
    assert np.array_equal(result4, res_array[3])
    assert np.array_equal(result5, res_array[4])
    assert np.array_equal(result6, res_array[5])
    assert np.array_equal(result7, res_array[6])
 def test_fast_text_all_to_vectors_params_eager():
    """
    Feature: FastText
    Description: test with all parameters which include `unk_init` and `lower_case_backup` in function ToVectors
        in eager mode
    Expectation: output is equal to the expected value
    """
    vectors = text.FastText.from_file(DATASET_ROOT_PATH + "fast_text.vec", max_vectors=4)
    my_unk = [-1, -1, -1, -1, -1, -1]
    to_vectors = T.ToVectors(vectors, unk_init=my_unk, lower_case_backup=True)
    result1 = to_vectors("Ok")
    result2 = to_vectors("!")
    result3 = to_vectors("This")
    result4 = to_vectors("is")
    result5 = to_vectors("my")
    result6 = to_vectors("home")
    result7 = to_vectors("none")
    res = [[0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411],
           [0.013441, 0.23682, -0.16899, 0.40951, 0.63812, 0.47709],
           [0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973],
           [0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603],
           [-1, -1, -1, -1, -1, -1],
           [-1, -1, -1, -1, -1, -1],
           [-1, -1, -1, -1, -1, -1]]
    res_array = np.array(res, dtype=np.float32)
    assert np.array_equal(result1, res_array[0])
    assert np.array_equal(result2, res_array[1])
    assert np.array_equal(result3, res_array[2])
    assert np.array_equal(result4, res_array[3])
    assert np.array_equal(result5, res_array[4])
    assert np.array_equal(result6, res_array[5])
    assert np.array_equal(result7, res_array[6])
 def test_fast_text_build_from_file():
    """
    Feature: FastText
    Description: test with only default parameter
    Expectation: output is equal to the expected value
    """
    vectors = text.FastText.from_file(DATASET_ROOT_PATH + "fast_text.vec")
    to_vectors = text.ToVectors(vectors)
    data = ds.TextFileDataset(DATASET_ROOT_PATH + "words.txt", shuffle=False)
    data = data.map(operations=to_vectors, input_columns=["text"])
    ind = 0
    res = [[0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411],
           [0, 0, 0, 0, 0, 0],
           [0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973],
           [0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603],
           [0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246],
           [0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923],
           [0, 0, 0, 0, 0, 0]]
    print(data)
    for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
        res_array = np.array(res[ind], dtype=np.float32)
        assert np.array_equal(res_array, d["text"]), ind
        ind += 1
 def test_fast_text_build_from_file_eager():
    """
    Feature: FastText
    Description: test with only default parameter in eager mode
    Expectation: output is equal to the expected value
    """
    vectors = text.FastText.from_file(DATASET_ROOT_PATH + "fast_text.vec")
    to_vectors = T.ToVectors(vectors)
    result1 = to_vectors("ok")
    result2 = to_vectors("!")
    result3 = to_vectors("this")
    result4 = to_vectors("is")
    result5 = to_vectors("my")
    result6 = to_vectors("home")
    result7 = to_vectors("none")
    res = [[0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411],
           [0.013441, 0.23682, -0.16899, 0.40951, 0.63812, 0.47709],
           [0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973],
           [0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603],
           [0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246],
           [0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923],
           [0, 0, 0, 0, 0, 0]]
    res_array = np.array(res, dtype=np.float32)
    assert np.array_equal(result1, res_array[0])
    assert np.array_equal(result2, res_array[1])
    assert np.array_equal(result3, res_array[2])
    assert np.array_equal(result4, res_array[3])
    assert np.array_equal(result5, res_array[4])
    assert np.array_equal(result6, res_array[5])
    assert np.array_equal(result7, res_array[6])
 def test_fast_text_invalid_input():
    """
    Feature: FastText
    Description: test the validate function with invalid parameters
    Expectation: output is equal to the expected error
    """
    def test_invalid_input(test_name, file_path, error, error_msg, max_vectors=None, unk_init=None,
                           lower_case_backup=False, token="ok"):
        log.info("Test Vectors with wrong input: {0}".format(test_name))
        with pytest.raises(error) as error_info:
            vectors = text.FastText.from_file(file_path, max_vectors=max_vectors)
            to_vectors = T.ToVectors(vectors, unk_init=unk_init, lower_case_backup=lower_case_backup)
            to_vectors(token)
        assert error_msg in str(error_info.value)
    test_invalid_input("Not all vectors have the same number of dimensions",
                       DATASET_ROOT_PATH + "fast_text_dim_different.vec", error=RuntimeError,
                       error_msg="all vectors must have the same number of dimensions, " \
                       "but got dim 5 while expecting 6")
    test_invalid_input("the file is empty.", DATASET_ROOT_PATH + "fast_text_empty.vec",
                       error=RuntimeError, error_msg="invalid file, file is empty.")
    test_invalid_input("the count of `unknown_init`'s element is different with word vector.",
                       DATASET_ROOT_PATH + "fast_text.vec",
                       error=RuntimeError,
                       error_msg="unk_init must be the same length as vectors, but got unk_init",
                       unk_init=[-1, -1])
    test_invalid_input("The file not exist", DATASET_ROOT_PATH + "not_exist.vec", RuntimeError,
                       error_msg="FastText: invalid file")
    test_invalid_input("The token is 1-dimensional", DATASET_ROOT_PATH + "fast_text_with_wrong_info.vec",
                       error=RuntimeError, error_msg="token with 1-dimensional vector.")
    test_invalid_input("max_vectors parameter must be greater than 0", DATASET_ROOT_PATH + "fast_text.vec",
                       error=ValueError, error_msg="Input max_vectors is not within the required interval",
                       max_vectors=-1)
    test_invalid_input("invalid max_vectors parameter type as a float", DATASET_ROOT_PATH + "fast_text.vec",
                       error=TypeError, error_msg="Argument max_vectors with value 1.0 is not of type [<class 'int'>],"
                       " but got <class 'float'>.", max_vectors=1.0)
    test_invalid_input("invalid max_vectors parameter type as a string", DATASET_ROOT_PATH + "fast_text.vec",
                       error=TypeError, error_msg="Argument max_vectors with value 1 is not of type [<class 'int'>],"
                       " but got <class 'str'>.", max_vectors="1")
    test_invalid_input("invalid token parameter type as a float", DATASET_ROOT_PATH + "fast_text.vec",
                       error=RuntimeError, error_msg="input tensor type should be string.", token=1.0)
    test_invalid_input("invalid lower_case_backup parameter type as a string", DATASET_ROOT_PATH + "fast_text.vec",
                       error=TypeError, error_msg="Argument lower_case_backup with value True is " \
                       "not of type [<class 'bool'>],"
                       " but got <class 'str'>.", lower_case_backup="True")
    test_invalid_input("invalid lower_case_backup parameter type as a string", DATASET_ROOT_PATH + "fast_text.vec",
                       error=TypeError, error_msg="Argument lower_case_backup with value True is " \
                       "not of type [<class 'bool'>],"
                       " but got <class 'str'>.", lower_case_backup="True")
    test_invalid_input("the suffix of pre-training set must be `*.vec`", DATASET_ROOT_PATH + "fast_text.txt",
                       error=RuntimeError, error_msg="FastText: invalid file, can not find file '*.vec'")
 if __name__ == '__main__':
    test_fast_text_all_build_from_file_params()
    test_fast_text_all_build_from_file_params_eager()
    test_fast_text_all_to_vectors_params_eager()
    test_fast_text_build_from_file()
    test_fast_text_build_from_file_eager()
    test_fast_text_invalid_input()