!22712 [assistant][ops] Add FastText

Merge pull request !22712 from 无言/FastText
2021-12-01 06:50:59 +00:00 · 2021-12-01 06:50:59 +00:00 · 0c6505db26
parent b3c51fc2aa 5ca98a494a
commit 0c6505db26
18 changed files with 922 additions and 5 deletions
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/bindings.cc
@ -19,6 +19,7 @@

 #include "minddata/dataset/api/python/pybind_register.h"
 #include "minddata/dataset/include/dataset/constants.h"
+#include "minddata/dataset/text/fast_text.h"
 #include "minddata/dataset/text/sentence_piece_vocab.h"
 #include "minddata/dataset/text/vectors.h"
 #include "minddata/dataset/text/vocab.h"
@ -88,6 +89,16 @@ PYBIND_REGISTER(SentencePieceModel, 0, ([](const py::module *m) {
                    .export_values();
                }));

+PYBIND_REGISTER(FastText, 1, ([](const py::module *m) {
+                  (void)py::class_<FastText, Vectors, std::shared_ptr<FastText>>(*m, "FastText")
+                    .def(py::init<>())
+                    .def_static("from_file", [](const std::string &path, int32_t max_vectors) {
+                      std::shared_ptr<FastText> fast_text;
+                      THROW_IF_ERROR(FastText::BuildFromFile(&fast_text, path, max_vectors));
+                      return fast_text;
+                    });
+                }));
+
 PYBIND_REGISTER(Vectors, 0, ([](const py::module *m) {
                  (void)py::class_<Vectors, std::shared_ptr<Vectors>>(*m, "Vectors")
                    .def(py::init<>())
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/text.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/text.h
@ -630,7 +630,7 @@ class MS_API ToNumber final : public TensorTransform {
 };

 /// \brief Look up a token into an vector according to the input Vectors table.
-class ToVectors final : public TensorTransform {
+class MS_API ToVectors final : public TensorTransform {
 public:
  /// \brief Constructor.
  /// \param[in] vectors A Vectors object.
--- a/mindspore/ccsrc/minddata/dataset/text/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/text/CMakeLists.txt
@ -4,9 +4,10 @@ add_subdirectory(kernels)
 file(GLOB _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(text OBJECT
+        fast_text.cc
+        sentence_piece_vocab.cc
        vectors.cc
        vocab.cc
-        sentence_piece_vocab.cc
        )

 add_dependencies(text text-kernels)
--- a/mindspore/ccsrc/minddata/dataset/text/fast_text.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/fast_text.cc
@ -0,0 +1,50 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "minddata/dataset/text/fast_text.h"
+
+#include "utils/file_utils.h"
+
+namespace mindspore {
+namespace dataset {
+FastText::FastText(const std::unordered_map<std::string, std::vector<float>> &map, int dim) : Vectors(map, dim) {}
+
+Status CheckFastText(const std::string &file_path) {
+  Path path = Path(file_path);
+  if (path.Exists() && !path.IsDirectory()) {
+    std::string basename = path.Basename();
+    size_t dot = basename.rfind('.');
+    std::string suffix = basename.substr(dot + 1);
+    if (suffix != "vec") {
+      RETURN_STATUS_UNEXPECTED("FastText: invalid file, can not find file '*.vec', but got: " + file_path);
+    }
+    return Status::OK();
+  } else {
+    RETURN_STATUS_UNEXPECTED("FastText: invalid file, failed to open FastText file.");
+  }
+}
+
+Status FastText::BuildFromFile(std::shared_ptr<FastText> *fast_text, const std::string &path, int32_t max_vectors) {
+  RETURN_UNEXPECTED_IF_NULL(fast_text);
+  RETURN_IF_NOT_OK(CheckFastText(path));
+  std::unordered_map<std::string, std::vector<float>> map;
+  int vector_dim = -1;
+  RETURN_IF_NOT_OK(Load(path, max_vectors, &map, &vector_dim));
+  *fast_text = std::make_shared<FastText>(std::move(map), vector_dim);
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/text/fast_text.h
+++ b/mindspore/ccsrc/minddata/dataset/text/fast_text.h
@ -0,0 +1,55 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_FAST_TEXT_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_FAST_TEXT_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/include/dataset/iterator.h"
+#include "minddata/dataset/text/vectors.h"
+#include "minddata/dataset/util/path.h"
+
+namespace mindspore {
+namespace dataset {
+/// \brief Pre-train word vectors.
+class FastText : public Vectors {
+ public:
+  /// Constructor.
+  FastText() = default;
+
+  /// Constructor.
+  /// \param[in] map A map between string and vector.
+  /// \param[in] dim Dimension of the vectors.
+  FastText(const std::unordered_map<std::string, std::vector<float>> &map, int dim);
+
+  /// Destructor.
+  ~FastText() = default;
+
+  /// \brief Build Vectors from reading a pre-train vector file.
+  /// \param[out] fast_text FastText object which contains the pre-train vectors.
+  /// \param[in] path Path to the pre-trained word vector file. The suffix of set must be `*.vec`.
+  /// \param[in] max_vectors This can be used to limit the number of pre-trained vectors loaded (default=0, no limit).
+  static Status BuildFromFile(std::shared_ptr<FastText> *fast_text, const std::string &path, int32_t max_vectors = 0);
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_FAST_TEXT_H_
--- a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h
@ -30,6 +30,8 @@ namespace dataset {
 class Vectors;
 class Vocab;
 class SentencePieceVocab;
+class Vectors;
+class Vocab;

 // Transform operations for text
 namespace text {
--- a/mindspore/dataset/text/init.py
+++ b/mindspore/dataset/text/init.py
@ -28,13 +28,13 @@ import platform
 from .transforms import Lookup, JiebaTokenizer, UnicodeCharTokenizer, Ngram, WordpieceTokenizer, \
    TruncateSequencePair, ToNumber, SlidingWindow, SentencePieceTokenizer, PythonTokenizer, ToVectors
 from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm, SentencePieceVocab, SentencePieceModel, \
-    SPieceTokenizerOutType, SPieceTokenizerLoadType, Vectors
+    SPieceTokenizerOutType, SPieceTokenizerLoadType, Vectors, FastText

 __all__ = [
    "Lookup", "JiebaTokenizer", "UnicodeCharTokenizer", "Ngram",
    "to_str", "to_bytes", "Vocab", "WordpieceTokenizer", "TruncateSequencePair", "ToNumber",
    "PythonTokenizer", "SlidingWindow", "SentencePieceVocab", "SentencePieceTokenizer", "SPieceTokenizerOutType",
-    "SentencePieceModel", "SPieceTokenizerLoadType", "JiebaMode", "NormalizeForm", "Vectors", "ToVectors"
+    "SentencePieceModel", "SPieceTokenizerLoadType", "JiebaMode", "NormalizeForm", "Vectors", "ToVectors", "FastText"
 ]

 if platform.system().lower() != 'windows':
--- a/mindspore/dataset/text/utils.py
+++ b/mindspore/dataset/text/utils.py
@ -27,7 +27,7 @@ from .validators import check_from_file, check_from_list, check_from_dict, check
    check_from_file_vectors

 __all__ = [
-    "Vocab", "SentencePieceVocab", "to_str", "to_bytes", "Vectors"
+    "Vocab", "SentencePieceVocab", "to_str", "to_bytes", "Vectors", "FastText"
 ]


@ -411,3 +411,30 @@ class Vectors(cde.Vectors):

        max_vectors = max_vectors if max_vectors is not None else 0
        return super().from_file(file_path, max_vectors)
+
+
+class FastText(cde.FastText):
+    """
+    FastText object that is used to map tokens into vectors.
+    """
+
+    @classmethod
+    @check_from_file_vectors
+    def from_file(cls, file_path, max_vectors=None):
+        """
+        Build a FastText vector from a file.
+
+        Args:
+            file_path (str): Path of the file that contains the vectors. The shuffix of pre-trained vector sets
+                must be `*.vec`.
+            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
+                Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
+                situations where the entire set doesn’t fit in memory, or is not needed for another reason,
+                passing max_vectors can limit the size of the loaded set (default=None, no limit).
+
+        Examples:
+            >>> fast_text = text.FastText.from_file("/path/to/fast_text/file", max_vectors=None)
+        """
+
+        max_vectors = max_vectors if max_vectors is not None else 0
+        return super().from_file(file_path, max_vectors)
--- a/tests/ut/cpp/dataset/c_api_text_test.cc
+++ b/tests/ut/cpp/dataset/c_api_text_test.cc
@ -23,11 +23,13 @@
 #include "minddata/dataset/include/dataset/datasets.h"
 #include "minddata/dataset/include/dataset/text.h"
 #include "minddata/dataset/include/dataset/transforms.h"
+#include "minddata/dataset/text/fast_text.h"
 #include "minddata/dataset/text/vectors.h"
 #include "minddata/dataset/text/vocab.h"

 using namespace mindspore::dataset;
 using mindspore::Status;
+using mindspore::dataset::FastText;
 using mindspore::dataset::ShuffleMode;
 using mindspore::dataset::Tensor;
 using mindspore::dataset::Vectors;
@ -3943,3 +3945,357 @@ TEST_F(MindDataTestPipeline, TestVectorsWithWrongInfoFile) {
  Status s = Vectors::BuildFromFile(&vectors, vectors_dir);
  EXPECT_NE(s, Status::OK());
 }
+
+/// Feature: FastText
+/// Description: test with default parameter in function BuildFromFile and function Lookup
+/// Expectation: return correct MSTensor which is equal to the expected
+TEST_F(MindDataTestPipeline, TestFastTextDefaultParam) {
+  // Test with default parameter.
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextDefaultParam.";
+
+  // Create a TextFile dataset
+  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
+  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
+  EXPECT_NE(ds, nullptr);
+
+  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fast_text.vec";
+  std::shared_ptr<FastText> fast_text;
+  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
+  EXPECT_EQ(s, Status::OK());
+
+  std::shared_ptr<TensorTransform> lookup = std::make_shared<text::ToVectors>(fast_text);
+  EXPECT_NE(lookup, nullptr);
+
+  // Create Map operation on ds
+  ds = ds->Map({lookup}, {"text"});
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+
+  uint64_t i = 0;
+  std::vector<std::vector<float>> expected = {{0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411},
+                                              {0, 0, 0, 0, 0, 0},
+                                              {0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973},
+                                              {0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603},
+                                              {0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246},
+                                              {0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923},
+                                              {0, 0, 0, 0, 0, 0}};
+  while (row.size() != 0) {
+    auto ind = row["text"];
+    MS_LOG(INFO) << ind.Shape();
+    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
+    TensorPtr de_expected_item;
+    dsize_t dim = 6;
+    ASSERT_OK(Tensor::CreateFromVector(expected[i], TensorShape({dim}), &de_expected_item));
+    mindspore::MSTensor ms_expected_item =
+      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_item));
+    EXPECT_MSTENSOR_EQ(ind, ms_expected_item);
+
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+
+  EXPECT_EQ(i, 7);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+/// Feature: FastText
+/// Description: test with all parameters which include `path` and `max_vector` in function BuildFromFile
+/// Expectation: return correct MSTensor which is equal to the expected
+TEST_F(MindDataTestPipeline, TestFastTextAllBuildfromfileParams) {
+  // Test with two parameters.
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextAllBuildfromfileParams.";
+
+  // Create a TextFile dataset
+  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
+  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
+  EXPECT_NE(ds, nullptr);
+
+  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fast_text.vec";
+  std::shared_ptr<FastText> fast_text;
+  Status s = FastText::BuildFromFile(&fast_text, vectors_dir, 100);
+  EXPECT_EQ(s, Status::OK());
+
+  std::shared_ptr<TensorTransform> lookup = std::make_shared<text::ToVectors>(fast_text);
+  EXPECT_NE(lookup, nullptr);
+
+  // Create Map operation on ds
+  ds = ds->Map({lookup}, {"text"});
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+
+  uint64_t i = 0;
+  std::vector<std::vector<float>> expected = {{0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411},
+                                              {0, 0, 0, 0, 0, 0},
+                                              {0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973},
+                                              {0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603},
+                                              {0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246},
+                                              {0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923},
+                                              {0, 0, 0, 0, 0, 0}};
+  while (row.size() != 0) {
+    auto ind = row["text"];
+    MS_LOG(INFO) << ind.Shape();
+    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
+    TensorPtr de_expected_item;
+    dsize_t dim = 6;
+    ASSERT_OK(Tensor::CreateFromVector(expected[i], TensorShape({dim}), &de_expected_item));
+    mindspore::MSTensor ms_expected_item =
+      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_item));
+    EXPECT_MSTENSOR_EQ(ind, ms_expected_item);
+
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+
+  EXPECT_EQ(i, 7);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+/// Feature: FastText
+/// Description: test with all parameters in function BuildFromFile and `unknown_init` in function Lookup
+/// Expectation: return correct MSTensor which is equal to the expected
+TEST_F(MindDataTestPipeline, TestFastTextUnknownInit) {
+  // Test with two parameters.
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextUnknownInit.";
+
+  // Create a TextFile dataset
+  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
+  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
+  EXPECT_NE(ds, nullptr);
+
+  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fast_text.vec";
+  std::shared_ptr<FastText> fast_text;
+  Status s = FastText::BuildFromFile(&fast_text, vectors_dir, 100);
+  EXPECT_EQ(s, Status::OK());
+
+  std::vector<float> unknown_init = {-1, -1, -1, -1, -1, -1};
+  std::shared_ptr<TensorTransform> lookup = std::make_shared<text::ToVectors>(fast_text, unknown_init);
+  EXPECT_NE(lookup, nullptr);
+
+  // Create Map operation on ds
+  ds = ds->Map({lookup}, {"text"});
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+
+  uint64_t i = 0;
+  std::vector<std::vector<float>> expected = {{0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411},
+                                              {-1, -1, -1, -1, -1, -1},
+                                              {0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973},
+                                              {0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603},
+                                              {0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246},
+                                              {0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923},
+                                              {-1, -1, -1, -1, -1, -1}};
+  while (row.size() != 0) {
+    auto ind = row["text"];
+    MS_LOG(INFO) << ind.Shape();
+    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
+    TensorPtr de_expected_item;
+    dsize_t dim = 6;
+    ASSERT_OK(Tensor::CreateFromVector(expected[i], TensorShape({dim}), &de_expected_item));
+    mindspore::MSTensor ms_expected_item =
+      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_item));
+    EXPECT_MSTENSOR_EQ(ind, ms_expected_item);
+
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+
+  EXPECT_EQ(i, 7);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+/// Feature: FastText
+/// Description: test with all parameters which include `path` and `max_vectors` in function BuildFromFile and `token`,
+///     `unknown_init` and `lower_case_backup` in function Lookup. But some tokens have some big letters
+/// Expectation: return correct MSTensor which is equal to the expected
+TEST_F(MindDataTestPipeline, TestFastTextAllParams) {
+  //  Test with all parameters.
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextAllParams.";
+  // Create a TextFile dataset
+  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
+  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
+  EXPECT_NE(ds, nullptr);
+
+  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fast_text.vec";
+  std::shared_ptr<FastText> fast_text;
+  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
+  EXPECT_EQ(s, Status::OK());
+
+  std::vector<float> unknown_init = {-1, -1, -1, -1, -1, -1};
+  std::shared_ptr<TensorTransform> lookup = std::make_shared<text::ToVectors>(fast_text, unknown_init, true);
+  EXPECT_NE(lookup, nullptr);
+
+  // Create Map operation on ds
+  ds = ds->Map({lookup}, {"text"});
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+
+  uint64_t i = 0;
+  std::vector<std::vector<float>> expected = {{0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411},
+                                              {-1, -1, -1, -1, -1, -1},
+                                              {0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973},
+                                              {0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603},
+                                              {0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246},
+                                              {0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923},
+                                              {-1, -1, -1, -1, -1, -1}};
+  while (row.size() != 0) {
+    auto ind = row["text"];
+    MS_LOG(INFO) << ind.Shape();
+    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
+    TensorPtr de_expected_item;
+    dsize_t dim = 6;
+    ASSERT_OK(Tensor::CreateFromVector(expected[i], TensorShape({dim}), &de_expected_item));
+    mindspore::MSTensor ms_expected_item =
+      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_item));
+    EXPECT_MSTENSOR_EQ(ind, ms_expected_item);
+
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+
+  EXPECT_EQ(i, 7);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+/// Feature: FastText
+/// Description: test with pre-vectors set that have the different dimension
+/// Expectation: throw correct error and message
+TEST_F(MindDataTestPipeline, TestFastTextDifferentDimension) {
+  //  Tokens don't have the same number of vectors.
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextDifferentDimension.";
+
+  // Create a TextFile dataset
+  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
+  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
+  EXPECT_NE(ds, nullptr);
+
+  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fasttext_dim_different.vec";
+  std::shared_ptr<FastText> fast_text;
+  Status s = FastText::BuildFromFile(&fast_text, vectors_dir, 100);
+  EXPECT_NE(s, Status::OK());
+}
+
+/// Feature: FastText
+/// Description: test with the parameter max_vectors that is <= 0
+/// Expectation: throw correct error and message
+TEST_F(MindDataTestPipeline, TestFastTextMaxVectorsLessThanZero) {
+  //  Test with max_vectors <= 0.
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextMaxVectorsLessThanZero.";
+
+  // Create a TextFile dataset
+  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
+  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
+  EXPECT_NE(ds, nullptr);
+
+  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fast_text.vec";
+  std::shared_ptr<FastText> fast_text;
+  Status s = FastText::BuildFromFile(&fast_text, vectors_dir, -1);
+  EXPECT_NE(s, Status::OK());
+}
+
+/// Feature: FastText
+/// Description: test with the pre-vectors file that is empty
+/// Expectation: throw correct error and message
+TEST_F(MindDataTestPipeline, TestFastTextWithEmptyFile) {
+  //  Read empty file.
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextWithEmptyFile.";
+
+  // Create a TextFile dataset
+  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
+  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
+  EXPECT_NE(ds, nullptr);
+
+  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fasttext_empty.vec";
+  std::shared_ptr<FastText> fast_text;
+  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
+  EXPECT_NE(s, Status::OK());
+}
+
+/// Feature: FastText
+/// Description: test with the pre-vectors file that is not exist
+/// Expectation: throw correct error and message
+TEST_F(MindDataTestPipeline, TestFastTextWithNotExistFile) {
+  //  Test with not exist file.
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextWithNotExistFile.";
+
+  // Create a TextFile dataset
+  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
+  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
+  EXPECT_NE(ds, nullptr);
+
+  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/no_fasttext.vec";
+  std::shared_ptr<FastText> fast_text;
+  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
+  EXPECT_NE(s, Status::OK());
+}
+
+/// Feature: FastText
+/// Description: test with the pre-vectors set that has a situation that info-head is not the first line in the set
+/// Expectation: throw correct error and message
+TEST_F(MindDataTestPipeline, TestFastTextWithWrongInfoFile) {
+  //  wrong info.
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextWithWrongInfoFile.";
+
+  // Create a TextFile dataset
+  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
+  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
+  EXPECT_NE(ds, nullptr);
+
+  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fasttext_with_wrong_info.vec";
+  std::shared_ptr<FastText> fast_text;
+  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
+  EXPECT_NE(s, Status::OK());
+}
+
+/// Feature: FastText
+/// Description: test with the pre-vectors set that has a wrong suffix
+/// Expectation: throw correct error and message
+TEST_F(MindDataTestPipeline, TestFastTextWithWrongSuffix) {
+  //  wrong info.
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextWithWrongSuffix.";
+
+  // Create a TextFile dataset
+  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
+  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
+  EXPECT_NE(ds, nullptr);
+
+  std::string vectors_dir = datasets_root_path_ + "/test_fast_text/fast_text.txt";
+  std::shared_ptr<FastText> fast_text;
+  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
+  EXPECT_NE(s, Status::OK());
+}
--- a/tests/ut/cpp/dataset/execute_test.cc
+++ b/tests/ut/cpp/dataset/execute_test.cc
@ -23,11 +23,13 @@
 #include "minddata/dataset/include/dataset/vision.h"
 #include "minddata/dataset/include/dataset/audio.h"
 #include "minddata/dataset/include/dataset/text.h"
+#include "minddata/dataset/text/fast_text.h"
 #include "minddata/dataset/text/vectors.h"
 #include "utils/log_adapter.h"

 using namespace mindspore::dataset;
 using mindspore::LogStream;
+using mindspore::dataset::FastText;
 using mindspore::dataset::Vectors;
 using mindspore::ExceptionType::NoExceptionType;
 using mindspore::MsLogLevel::INFO;
@ -1665,6 +1667,140 @@ TEST_F(MindDataTestExecute, TestToVectorsWithInvalidParam) {
  EXPECT_FALSE(status02.IsOk());
 }

+/// Feature: FastText
+/// Description: test basic usage of FastText and the ToVectors with default parameter
+/// Expectation: get correct MSTensor
+TEST_F(MindDataTestExecute, TestFastTextParam) {
+  MS_LOG(INFO) << "Doing MindDataTestExecute-TestFastTextParam.";
+  std::shared_ptr<Tensor> de_tensor;
+  Tensor::CreateScalar<std::string>("ok", &de_tensor);
+  auto token = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor));
+  mindspore::MSTensor lookup_result;
+
+  // Create expected output.
+  std::shared_ptr<Tensor> de_expected;
+  std::vector<float> expected = {0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411};
+  dsize_t dim = 6;
+  ASSERT_OK(Tensor::CreateFromVector(expected, TensorShape({dim}), &de_expected));
+  auto ms_expected = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected));
+
+  // Transform params.
+  std::string vectors_dir = "data/dataset/test_fast_text/fast_text.vec";
+  std::shared_ptr<FastText> fast_text01;
+  Status s01 = FastText::BuildFromFile(&fast_text01, vectors_dir);
+  EXPECT_EQ(s01, Status::OK());
+  std::shared_ptr<TensorTransform> to_vectors01 = std::make_shared<text::ToVectors>(fast_text01);
+  auto transform01 = Execute({to_vectors01});
+  Status status01 = transform01(token, &lookup_result);
+  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected);
+  EXPECT_TRUE(status01.IsOk());
+
+  std::shared_ptr<FastText> fast_text02;
+  Status s02 = FastText::BuildFromFile(&fast_text02, vectors_dir, 100);
+  EXPECT_EQ(s02, Status::OK());
+  std::shared_ptr<TensorTransform> to_vectors02 = std::make_shared<text::ToVectors>(fast_text02);
+  auto transform02 = Execute({to_vectors02});
+  Status status02 = transform02(token, &lookup_result);
+  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected);
+  EXPECT_TRUE(status02.IsOk());
+
+  std::shared_ptr<FastText> fast_text03;
+  Status s03 = FastText::BuildFromFile(&fast_text03, vectors_dir, 3);
+  EXPECT_EQ(s03, Status::OK());
+  std::shared_ptr<TensorTransform> to_vectors03 = std::make_shared<text::ToVectors>(fast_text03);
+  auto transform03 = Execute({to_vectors03});
+  Status status03 = transform03(token, &lookup_result);
+  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected);
+  EXPECT_TRUE(status03.IsOk());
+}
+
+/// Feature: ToVectors
+/// Description: test basic usage of ToVectors and the FastText with default parameter
+/// Expectation: get correct MSTensor
+TEST_F(MindDataTestExecute, TestToVectorsParamForFastText) {
+  MS_LOG(INFO) << "Doing MindDataTestExecute-TestToVectorsParamForFastText.";
+  std::shared_ptr<Tensor> de_tensor01;
+  Tensor::CreateScalar<std::string>("none", &de_tensor01);
+  auto token01 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor01));
+  std::shared_ptr<Tensor> de_tensor02;
+  Tensor::CreateScalar<std::string>("ok", &de_tensor02);
+  auto token02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor02));
+  std::shared_ptr<Tensor> de_tensor03;
+  Tensor::CreateScalar<std::string>("OK", &de_tensor03);
+  auto token03 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor03));
+  mindspore::MSTensor lookup_result;
+
+  // Create expected output.
+  dsize_t dim = 6;
+  std::shared_ptr<Tensor> de_expected01;
+  std::vector<float> expected01 = {0, 0, 0, 0, 0, 0};
+  ASSERT_OK(Tensor::CreateFromVector(expected01, TensorShape({dim}), &de_expected01));
+  auto ms_expected01 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected01));
+  std::shared_ptr<Tensor> de_expected02;
+  std::vector<float> expected02 = {-1, -1, -1, -1, -1, -1};
+  ASSERT_OK(Tensor::CreateFromVector(expected02, TensorShape({dim}), &de_expected02));
+  auto ms_expected02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected02));
+  std::shared_ptr<Tensor> de_expected03;
+  std::vector<float> expected03 = {0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411};
+  ASSERT_OK(Tensor::CreateFromVector(expected03, TensorShape({dim}), &de_expected03));
+  auto ms_expected03 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected03));
+
+  // Transform params.
+  std::string vectors_dir = "data/dataset/test_fast_text/fast_text.vec";
+  std::shared_ptr<FastText> fast_text;
+  Status s = FastText::BuildFromFile(&fast_text, vectors_dir);
+  EXPECT_EQ(s, Status::OK());
+
+  std::shared_ptr<TensorTransform> to_vectors01 = std::make_shared<text::ToVectors>(fast_text);
+  auto transform01 = Execute({to_vectors01});
+  Status status01 = transform01(token01, &lookup_result);
+  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected01);
+  EXPECT_TRUE(status01.IsOk());
+  std::vector<float> unknown_init = {-1, -1, -1, -1, -1, -1};
+  std::shared_ptr<TensorTransform> to_vectors02 = std::make_shared<text::ToVectors>(fast_text, unknown_init);
+  auto transform02 = Execute({to_vectors02});
+  Status status02 = transform02(token01, &lookup_result);
+  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected02);
+  EXPECT_TRUE(status02.IsOk());
+  std::shared_ptr<TensorTransform> to_vectors03 = std::make_shared<text::ToVectors>(fast_text, unknown_init);
+  auto transform03 = Execute({to_vectors03});
+  Status status03 = transform03(token02, &lookup_result);
+  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected03);
+  EXPECT_TRUE(status03.IsOk());
+  std::shared_ptr<TensorTransform> to_vectors04 = std::make_shared<text::ToVectors>(fast_text, unknown_init, true);
+  auto transform04 = Execute({to_vectors04});
+  Status status04 = transform04(token03, &lookup_result);
+  EXPECT_MSTENSOR_EQ(lookup_result, ms_expected03);
+  EXPECT_TRUE(status04.IsOk());
+}
+
+/// Feature: ToVectors
+/// Description: test invalid parameter of ToVectors for FastText
+/// Expectation: throw exception correctly
+TEST_F(MindDataTestExecute, TestToVectorsWithInvalidParamForFastText) {
+  MS_LOG(INFO) << "Doing MindDataTestExecute-TestToVectorsWithInvalidParamForFastText.";
+  std::shared_ptr<Tensor> de_tensor;
+  Tensor::CreateScalar<std::string>("none", &de_tensor);
+  auto token = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor));
+  mindspore::MSTensor lookup_result;
+
+  // Transform params.
+  std::string vectors_dir = "data/dataset/test_fast_text/fast_text.vec";
+  std::shared_ptr<FastText> fast_text01;
+  Status s = FastText::BuildFromFile(&fast_text01, vectors_dir);
+  EXPECT_EQ(s, Status::OK());
+  std::vector<float> unknown_init = {-1, -1, -1, -1};
+  std::shared_ptr<TensorTransform> to_vectors01 = std::make_shared<text::ToVectors>(fast_text01, unknown_init);
+  auto transform01 = Execute({to_vectors01});
+  Status status01 = transform01(token, &lookup_result);
+  EXPECT_FALSE(status01.IsOk());
+  std::shared_ptr<FastText> fast_text02 = nullptr;
+  std::shared_ptr<TensorTransform> to_vectors02 = std::make_shared<text::ToVectors>(fast_text02);
+  auto transform02 = Execute({to_vectors02});
+  Status status02 = transform02(token, &lookup_result);
+  EXPECT_FALSE(status02.IsOk());
+}
+
 // Feature: DBToAmplitude
 // Description: test DBToAmplitude in eager mode
 // Expectation: the data is processed successfully
--- a/tests/ut/data/dataset/test_fast_text/fast_text.txt
+++ b/tests/ut/data/dataset/test_fast_text/fast_text.txt
@ -0,0 +1,7 @@
+6 6
+ok 0.418 0.24968 -0.41242 0.1217 0.34527 -0.04445718411
+! 0.013441 0.23682 -0.16899 0.40951 0.63812 0.47709 
+this 0.15164 0.30177 -0.16763 0.17684 0.31719 0.33973 
+is 0.70853 0.57088 -0.4716 0.18048 0.54449 0.72603 
+my 0.68047 -0.039263 0.30186 -0.17792 0.42962 0.032246 
+home 0.26818 0.14346 -0.27877 0.016257 0.11384 0.69923 
--- a/tests/ut/data/dataset/test_fast_text/fast_text.vec
+++ b/tests/ut/data/dataset/test_fast_text/fast_text.vec
@ -0,0 +1,7 @@
+6 6
+ok 0.418 0.24968 -0.41242 0.1217 0.34527 -0.04445718411
+! 0.013441 0.23682 -0.16899 0.40951 0.63812 0.47709 
+this 0.15164 0.30177 -0.16763 0.17684 0.31719 0.33973 
+is 0.70853 0.57088 -0.4716 0.18048 0.54449 0.72603 
+my 0.68047 -0.039263 0.30186 -0.17792 0.42962 0.032246 
+home 0.26818 0.14346 -0.27877 0.016257 0.11384 0.69923 
--- a/tests/ut/data/dataset/test_fast_text/fast_text_dim_different.vec
+++ b/tests/ut/data/dataset/test_fast_text/fast_text_dim_different.vec
@ -0,0 +1,7 @@
+6 6
+ok 0.418 0.24968 -0.41242 0.1217 0.34527 -0.04445718411
+! 0.013441 0.23682 -0.16899 0.40951 0.63812 0.47709 
+this 0.15164 0.30177 -0.16763 0.17684 0.31719 
+is 0.70853 0.57088 -0.4716 0.18048 0.54449 0.72603 
+my 0.68047 -0.039263 0.30186 -0.17792 0.42962 0.032246 
+home 0.26818 0.14346 -0.27877 0.016257 0.11384 0.69923
--- a/tests/ut/data/dataset/test_fast_text/fast_text_empty.vec
+++ b/tests/ut/data/dataset/test_fast_text/fast_text_empty.vec
--- a/tests/ut/data/dataset/test_fast_text/fast_text_with_wrong_info.vec
+++ b/tests/ut/data/dataset/test_fast_text/fast_text_with_wrong_info.vec
@ -0,0 +1,7 @@
+the 0.418 0.24968 -0.41242 0.1217 0.34527 -0.04445718411
+, 0.013441 0.23682 -0.16899 0.40951 0.63812 0.47709 
+. 0.15164 0.30177 -0.16763 0.17684 0.31719 0.33973
+6 6 
+of 0.70853 0.57088 -0.4716 0.18048 0.54449 0.72603 
+to 0.68047 -0.039263 0.30186 -0.17792 0.42962 0.032246 
+and 0.26818 0.14346 -0.27877 0.016257 0.11384 0.69923
--- a/tests/ut/data/dataset/test_fast_text/words.txt
+++ b/tests/ut/data/dataset/test_fast_text/words.txt
@ -0,0 +1,7 @@
+ok
+.
+this
+is
+my
+home
+.
--- a/tests/ut/data/dataset/test_fast_text/words_with_big_letter.txt
+++ b/tests/ut/data/dataset/test_fast_text/words_with_big_letter.txt
@ -0,0 +1,7 @@
+ok
+!
+This
+iS
+my
+HOME
+.
--- a/tests/ut/python/dataset/test_fast_text.py
+++ b/tests/ut/python/dataset/test_fast_text.py
@ -0,0 +1,237 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import numpy as np
+import pytest
+
+from mindspore import log
+import mindspore.dataset as ds
+import mindspore.dataset.text as text
+import mindspore.dataset.text.transforms as T
+
+DATASET_ROOT_PATH = "../data/dataset/test_fast_text/"
+
+
+def test_fast_text_all_build_from_file_params():
+    """
+    Feature: FastText
+    Description: test with all parameters which include `path` and `max_vector` in function BuildFromFile
+    Expectation: output is equal to the expected value
+    """
+    vectors = text.FastText.from_file(DATASET_ROOT_PATH + "fast_text.vec", max_vectors=100)
+    to_vectors = text.ToVectors(vectors)
+    data = ds.TextFileDataset(DATASET_ROOT_PATH + "words.txt", shuffle=False)
+    data = data.map(operations=to_vectors, input_columns=["text"])
+    ind = 0
+    res = [[0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411],
+           [0, 0, 0, 0, 0, 0],
+           [0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973],
+           [0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603],
+           [0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246],
+           [0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923],
+           [0, 0, 0, 0, 0, 0]]
+    print(data)
+    for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
+        res_array = np.array(res[ind], dtype=np.float32)
+        assert np.array_equal(res_array, d["text"]), ind
+        ind += 1
+
+
+def test_fast_text_all_build_from_file_params_eager():
+    """
+    Feature: FastText
+    Description: test with all parameters which include `path` and `max_vector` in function BuildFromFile in eager mode
+    Expectation: output is equal to the expected value
+    """
+    vectors = text.FastText.from_file(DATASET_ROOT_PATH + "fast_text.vec", max_vectors=4)
+    to_vectors = T.ToVectors(vectors)
+    result1 = to_vectors("ok")
+    result2 = to_vectors("!")
+    result3 = to_vectors("this")
+    result4 = to_vectors("is")
+    result5 = to_vectors("my")
+    result6 = to_vectors("home")
+    result7 = to_vectors("none")
+    res = [[0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411],
+           [0.013441, 0.23682, -0.16899, 0.40951, 0.63812, 0.47709],
+           [0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973],
+           [0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603],
+           [0, 0, 0, 0, 0, 0],
+           [0, 0, 0, 0, 0, 0],
+           [0, 0, 0, 0, 0, 0]]
+    res_array = np.array(res, dtype=np.float32)
+
+    assert np.array_equal(result1, res_array[0])
+    assert np.array_equal(result2, res_array[1])
+    assert np.array_equal(result3, res_array[2])
+    assert np.array_equal(result4, res_array[3])
+    assert np.array_equal(result5, res_array[4])
+    assert np.array_equal(result6, res_array[5])
+    assert np.array_equal(result7, res_array[6])
+
+
+def test_fast_text_all_to_vectors_params_eager():
+    """
+    Feature: FastText
+    Description: test with all parameters which include `unk_init` and `lower_case_backup` in function ToVectors
+        in eager mode
+    Expectation: output is equal to the expected value
+    """
+    vectors = text.FastText.from_file(DATASET_ROOT_PATH + "fast_text.vec", max_vectors=4)
+    my_unk = [-1, -1, -1, -1, -1, -1]
+    to_vectors = T.ToVectors(vectors, unk_init=my_unk, lower_case_backup=True)
+    result1 = to_vectors("Ok")
+    result2 = to_vectors("!")
+    result3 = to_vectors("This")
+    result4 = to_vectors("is")
+    result5 = to_vectors("my")
+    result6 = to_vectors("home")
+    result7 = to_vectors("none")
+    res = [[0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411],
+           [0.013441, 0.23682, -0.16899, 0.40951, 0.63812, 0.47709],
+           [0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973],
+           [0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603],
+           [-1, -1, -1, -1, -1, -1],
+           [-1, -1, -1, -1, -1, -1],
+           [-1, -1, -1, -1, -1, -1]]
+    res_array = np.array(res, dtype=np.float32)
+
+    assert np.array_equal(result1, res_array[0])
+    assert np.array_equal(result2, res_array[1])
+    assert np.array_equal(result3, res_array[2])
+    assert np.array_equal(result4, res_array[3])
+    assert np.array_equal(result5, res_array[4])
+    assert np.array_equal(result6, res_array[5])
+    assert np.array_equal(result7, res_array[6])
+
+
+def test_fast_text_build_from_file():
+    """
+    Feature: FastText
+    Description: test with only default parameter
+    Expectation: output is equal to the expected value
+    """
+    vectors = text.FastText.from_file(DATASET_ROOT_PATH + "fast_text.vec")
+    to_vectors = text.ToVectors(vectors)
+    data = ds.TextFileDataset(DATASET_ROOT_PATH + "words.txt", shuffle=False)
+    data = data.map(operations=to_vectors, input_columns=["text"])
+    ind = 0
+    res = [[0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411],
+           [0, 0, 0, 0, 0, 0],
+           [0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973],
+           [0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603],
+           [0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246],
+           [0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923],
+           [0, 0, 0, 0, 0, 0]]
+    print(data)
+    for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
+        res_array = np.array(res[ind], dtype=np.float32)
+        assert np.array_equal(res_array, d["text"]), ind
+        ind += 1
+
+
+def test_fast_text_build_from_file_eager():
+    """
+    Feature: FastText
+    Description: test with only default parameter in eager mode
+    Expectation: output is equal to the expected value
+    """
+    vectors = text.FastText.from_file(DATASET_ROOT_PATH + "fast_text.vec")
+    to_vectors = T.ToVectors(vectors)
+    result1 = to_vectors("ok")
+    result2 = to_vectors("!")
+    result3 = to_vectors("this")
+    result4 = to_vectors("is")
+    result5 = to_vectors("my")
+    result6 = to_vectors("home")
+    result7 = to_vectors("none")
+    res = [[0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.04445718411],
+           [0.013441, 0.23682, -0.16899, 0.40951, 0.63812, 0.47709],
+           [0.15164, 0.30177, -0.16763, 0.17684, 0.31719, 0.33973],
+           [0.70853, 0.57088, -0.4716, 0.18048, 0.54449, 0.72603],
+           [0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246],
+           [0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923],
+           [0, 0, 0, 0, 0, 0]]
+    res_array = np.array(res, dtype=np.float32)
+
+    assert np.array_equal(result1, res_array[0])
+    assert np.array_equal(result2, res_array[1])
+    assert np.array_equal(result3, res_array[2])
+    assert np.array_equal(result4, res_array[3])
+    assert np.array_equal(result5, res_array[4])
+    assert np.array_equal(result6, res_array[5])
+    assert np.array_equal(result7, res_array[6])
+
+
+def test_fast_text_invalid_input():
+    """
+    Feature: FastText
+    Description: test the validate function with invalid parameters
+    Expectation: output is equal to the expected error
+    """
+    def test_invalid_input(test_name, file_path, error, error_msg, max_vectors=None, unk_init=None,
+                           lower_case_backup=False, token="ok"):
+        log.info("Test Vectors with wrong input: {0}".format(test_name))
+        with pytest.raises(error) as error_info:
+            vectors = text.FastText.from_file(file_path, max_vectors=max_vectors)
+            to_vectors = T.ToVectors(vectors, unk_init=unk_init, lower_case_backup=lower_case_backup)
+            to_vectors(token)
+        assert error_msg in str(error_info.value)
+
+    test_invalid_input("Not all vectors have the same number of dimensions",
+                       DATASET_ROOT_PATH + "fast_text_dim_different.vec", error=RuntimeError,
+                       error_msg="all vectors must have the same number of dimensions, " \
+                       "but got dim 5 while expecting 6")
+    test_invalid_input("the file is empty.", DATASET_ROOT_PATH + "fast_text_empty.vec",
+                       error=RuntimeError, error_msg="invalid file, file is empty.")
+    test_invalid_input("the count of `unknown_init`'s element is different with word vector.",
+                       DATASET_ROOT_PATH + "fast_text.vec",
+                       error=RuntimeError,
+                       error_msg="unk_init must be the same length as vectors, but got unk_init",
+                       unk_init=[-1, -1])
+    test_invalid_input("The file not exist", DATASET_ROOT_PATH + "not_exist.vec", RuntimeError,
+                       error_msg="FastText: invalid file")
+    test_invalid_input("The token is 1-dimensional", DATASET_ROOT_PATH + "fast_text_with_wrong_info.vec",
+                       error=RuntimeError, error_msg="token with 1-dimensional vector.")
+    test_invalid_input("max_vectors parameter must be greater than 0", DATASET_ROOT_PATH + "fast_text.vec",
+                       error=ValueError, error_msg="Input max_vectors is not within the required interval",
+                       max_vectors=-1)
+    test_invalid_input("invalid max_vectors parameter type as a float", DATASET_ROOT_PATH + "fast_text.vec",
+                       error=TypeError, error_msg="Argument max_vectors with value 1.0 is not of type [<class 'int'>],"
+                       " but got <class 'float'>.", max_vectors=1.0)
+    test_invalid_input("invalid max_vectors parameter type as a string", DATASET_ROOT_PATH + "fast_text.vec",
+                       error=TypeError, error_msg="Argument max_vectors with value 1 is not of type [<class 'int'>],"
+                       " but got <class 'str'>.", max_vectors="1")
+    test_invalid_input("invalid token parameter type as a float", DATASET_ROOT_PATH + "fast_text.vec",
+                       error=RuntimeError, error_msg="input tensor type should be string.", token=1.0)
+    test_invalid_input("invalid lower_case_backup parameter type as a string", DATASET_ROOT_PATH + "fast_text.vec",
+                       error=TypeError, error_msg="Argument lower_case_backup with value True is " \
+                       "not of type [<class 'bool'>],"
+                       " but got <class 'str'>.", lower_case_backup="True")
+    test_invalid_input("invalid lower_case_backup parameter type as a string", DATASET_ROOT_PATH + "fast_text.vec",
+                       error=TypeError, error_msg="Argument lower_case_backup with value True is " \
+                       "not of type [<class 'bool'>],"
+                       " but got <class 'str'>.", lower_case_backup="True")
+    test_invalid_input("the suffix of pre-training set must be `*.vec`", DATASET_ROOT_PATH + "fast_text.txt",
+                       error=RuntimeError, error_msg="FastText: invalid file, can not find file '*.vec'")
+
+
+if __name__ == '__main__':
+    test_fast_text_all_build_from_file_params()
+    test_fast_text_all_build_from_file_params_eager()
+    test_fast_text_all_to_vectors_params_eager()
+    test_fast_text_build_from_file()
+    test_fast_text_build_from_file_eager()
+    test_fast_text_invalid_input()