Initial commit for album

Added linter fix for album dataset Added testDataset Adding signature Added JsonDataset example API Example dataset Resolving format More fixing Refactor Small fix Added compiling album dataset Running tests Added linter fix #1 Passing UT Added dataset API Addressing clang Clang part 2 Fixing pass Fixed tree check lint fix Added lint fix part 2
2020-06-16 23:02:39 -04:00 · 2020-06-16 23:02:39 -04:00 · c79db93c48
parent e06dfaa80d
commit c79db93c48
51 changed files with 1366 additions and 72 deletions
--- a/build.sh
+++ b/build.sh
@ -393,7 +393,7 @@ build_mindspore()
      CMAKE_VERBOSE="--verbose"
    fi
    cmake --build . --target package ${CMAKE_VERBOSE} -j$THREAD_NUM
-    echo "success to build mindspore project!"
+    echo "success building mindspore project!"
 }

 checkndk() {
--- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
@ -21,6 +21,7 @@
 #include "minddata/dataset/include/transforms.h"
 #include "minddata/dataset/engine/dataset_iterator.h"
 // Source dataset headers (in alphabetical order)
+#include "minddata/dataset/engine/datasetops/source/album_op.h"
 #include "minddata/dataset/engine/datasetops/source/celeba_op.h"
 #include "minddata/dataset/engine/datasetops/source/cifar_op.h"
 #include "minddata/dataset/engine/datasetops/source/clue_op.h"
@ -117,6 +118,15 @@ std::shared_ptr<SchemaObj> Schema(const std::string &schema_file) {
 // FUNCTIONS TO CREATE DATASETS FOR LEAF-NODE DATASETS
 // (In alphabetical order)

+// Function to create a AlbumDataset.
+std::shared_ptr<AlbumDataset> Album(const std::string &dataset_dir, const std::string &data_schema,
+                                    const std::vector<std::string> &column_names, bool decode,
+                                    const std::shared_ptr<SamplerObj> &sampler) {
+  auto ds = std::make_shared<AlbumDataset>(dataset_dir, data_schema, column_names, decode, sampler);
+
+  return ds->ValidateParams() ? ds : nullptr;
+}
+
 // Function to create a CelebADataset.
 std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std::string &dataset_type,
                                      const std::shared_ptr<SamplerObj> &sampler, bool decode,
@ -687,6 +697,49 @@ bool ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_sha
 // DERIVED DATASET CLASSES LEAF-NODE DATASETS
 // (In alphabetical order)

+// Constructor for AlbumDataset
+AlbumDataset::AlbumDataset(const std::string &dataset_dir, const std::string &data_schema,
+                           const std::vector<std::string> &column_names, bool decode,
+                           const std::shared_ptr<SamplerObj> &sampler)
+    : dataset_dir_(dataset_dir),
+      schema_path_(data_schema),
+      column_names_(column_names),
+      decode_(decode),
+      sampler_(sampler) {}
+
+bool AlbumDataset::ValidateParams() {
+  if (!ValidateDatasetDirParam("AlbumDataset", dataset_dir_)) {
+    return false;
+  }
+
+  if (!ValidateDatasetFilesParam("AlbumDataset", {schema_path_})) {
+    return false;
+  }
+
+  return true;
+}
+
+// Function to build AlbumDataset
+std::vector<std::shared_ptr<DatasetOp>> AlbumDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  // If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
+  if (sampler_ == nullptr) {
+    sampler_ = CreateDefaultSampler();
+  }
+
+  auto schema = std::make_unique<DataSchema>();
+  RETURN_EMPTY_IF_ERROR(schema->LoadSchemaFile(schema_path_, column_names_));
+
+  // Argument that is not exposed to user in the API.
+  std::set<std::string> extensions = {};
+
+  node_ops.push_back(std::make_shared<AlbumOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
+                                               decode_, extensions, std::move(schema), std::move(sampler_->Build())));
+  return node_ops;
+}
+
 // Constructor for CelebADataset
 CelebADataset::CelebADataset(const std::string &dataset_dir, const std::string &dataset_type,
                             const std::shared_ptr<SamplerObj> &sampler, const bool &decode,
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
@ -13,6 +13,7 @@ set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
    text_file_op.cc
    clue_op.cc
    csv_op.cc
+    album_op.cc
    )

 set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc
@ -0,0 +1,508 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/engine/datasetops/source/album_op.h"
+#include <fstream>
+#include <iomanip>
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+
+namespace mindspore {
+namespace dataset {
+AlbumOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr), builder_schema_file_("") {
+  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
+  builder_num_workers_ = cfg->num_parallel_workers();
+  builder_rows_per_buffer_ = cfg->rows_per_buffer();
+  builder_op_connector_size_ = cfg->op_connector_size();
+}
+
+Status AlbumOp::Builder::Build(std::shared_ptr<AlbumOp> *ptr) {
+  RETURN_IF_NOT_OK(SanityCheck());
+  if (builder_sampler_ == nullptr) {
+    int64_t num_samples = 0;  // default num samples of 0 means to sample entire set of data
+    int64_t start_index = 0;
+    builder_sampler_ = std::make_shared<SequentialSampler>(start_index, num_samples);
+  }
+
+  builder_schema_ = std::make_unique<DataSchema>();
+  Path schema_file(builder_schema_file_);
+  if (builder_schema_file_ == "" || !schema_file.Exists()) {
+    RETURN_STATUS_UNEXPECTED("Schema not provided");
+  } else {
+    MS_LOG(INFO) << "Schema file provided: " << builder_schema_file_ << ".";
+    builder_schema_->LoadSchemaFile(builder_schema_file_, builder_columns_to_load_);
+  }
+  *ptr = std::make_shared<AlbumOp>(builder_num_workers_, builder_rows_per_buffer_, builder_dir_,
+                                   builder_op_connector_size_, builder_decode_, builder_extensions_,
+                                   std::move(builder_schema_), std::move(builder_sampler_));
+  return Status::OK();
+}
+
+Status AlbumOp::Builder::SanityCheck() {
+  Path dir(builder_dir_);
+  std::string err_msg;
+  err_msg += dir.IsDirectory() == false ? "Album path is invalid or not set\n" : "";
+  err_msg += builder_num_workers_ <= 0 ? "Num of parallel workers is set to 0\n" : "";
+  return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg);
+}
+
+AlbumOp::AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode,
+                 const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema,
+                 std::shared_ptr<Sampler> sampler)
+    : ParallelOp(num_wkrs, queue_size),
+      rows_per_buffer_(rows_per_buffer),
+      folder_path_(file_dir),
+      decode_(do_decode),
+      extensions_(exts),
+      data_schema_(std::move(data_schema)),
+      sampler_(std::move(sampler)),
+      row_cnt_(0),
+      buf_cnt_(0),
+      sampler_ind_(0),
+      dirname_offset_(0) {
+  // Set the column name map (base class field)
+  for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
+    column_name_id_map_[data_schema_->column(i).name()] = i;
+  }
+  io_block_queues_.Init(num_workers_, queue_size);
+}
+
+// Helper function for string comparison
+bool StrComp(const std::string &a, const std::string &b) {
+  // returns 1 if string a is alphabetically
+  // less than string b
+  // quite similar to strcmp operation
+  return a < b;
+}
+
+// Single thread to go through the folder directory and gets all file names
+// calculate numRows then return
+Status AlbumOp::PrescanEntry() {
+  Path folder(folder_path_);
+  dirname_offset_ = folder_path_.length();
+  std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder);
+  if (folder.Exists() == false || dirItr == nullptr) {
+    RETURN_STATUS_UNEXPECTED("Error unable to open: " + folder_path_);
+  }
+  MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << ".";
+
+  while (dirItr->hasNext()) {
+    Path file = dirItr->next();
+    if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) {
+      (void)image_rows_.push_back(file.toString().substr(dirname_offset_));
+    } else {
+      MS_LOG(INFO) << "Album operator unsupported file found: " << file.toString()
+                   << ", extension: " << file.Extension() << ".";
+    }
+  }
+
+  std::sort(image_rows_.begin(), image_rows_.end(), StrComp);
+  num_rows_ = image_rows_.size();
+  return Status::OK();
+}
+
+// Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work
+Status AlbumOp::operator()() {
+  RETURN_IF_NOT_OK(this->PrescanEntry());
+  RETURN_IF_NOT_OK(LaunchThreadsAndInitOp());
+  std::unique_ptr<DataBuffer> sampler_buffer;
+  RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer));
+  while (true) {  // each iterator is 1 epoch
+    std::vector<int64_t> keys;
+    keys.reserve(rows_per_buffer_);
+    while (sampler_buffer->eoe() == false) {
+      TensorRow sample_row;
+      RETURN_IF_NOT_OK(sampler_buffer->PopRow(&sample_row));
+      std::shared_ptr<Tensor> sample_ids = sample_row[0];
+      for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) {
+        if ((*itr) >= num_rows_) continue;  // index out of bound, skipping
+        keys.push_back(*itr);
+        row_cnt_++;
+        if (row_cnt_ % rows_per_buffer_ == 0) {
+          RETURN_IF_NOT_OK(
+            io_block_queues_[buf_cnt_++ % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone)));
+          keys.clear();
+        }
+      }
+      RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer));
+    }
+    if (keys.empty() == false) {
+      RETURN_IF_NOT_OK(
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone)));
+    }
+    if (IsLastIteration()) {
+      std::unique_ptr<IOBlock> eoe_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe);
+      std::unique_ptr<IOBlock> eof_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof);
+      RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block)));
+      RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block)));
+      for (int32_t i = 0; i < num_workers_; ++i) {
+        RETURN_IF_NOT_OK(
+          io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
+      }
+      return Status::OK();
+    } else {  // not the last repeat. Sleep master thread, wait for the wake-up from reset
+      RETURN_IF_NOT_OK(
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+      RETURN_IF_NOT_OK(wp_.Wait());  // Master thread goes to sleep after it has made all the IOBlocks
+      wp_.Clear();
+      RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer));
+    }
+    UpdateRepeatAndEpochCounter();
+  }
+}
+
+// contains the main logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_
+// IMPORTANT: 1 IOBlock produces 1 DataBuffer
+Status AlbumOp::WorkerEntry(int32_t worker_id) {
+  TaskManager::FindMe()->Post();
+  int64_t buffer_id = worker_id;
+  std::unique_ptr<IOBlock> io_block;
+  RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block));
+  while (io_block != nullptr) {
+    if (io_block->eoe() == true) {
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
+      buffer_id = worker_id;
+    } else if (io_block->eof() == true) {
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
+    } else {
+      std::vector<int64_t> keys;
+      RETURN_IF_NOT_OK(io_block->GetKeys(&keys));
+      if (keys.empty() == true) return Status::OK();  // empty key is a quit signal for workers
+      std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
+      RETURN_IF_NOT_OK(LoadBuffer(keys, &db));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db)));
+      buffer_id += num_workers_;
+    }
+    RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block));
+  }
+  RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker");
+}
+
+// Only support JPEG/PNG/GIF/BMP
+// Optimization: Could take in a tensor
+Status AlbumOp::CheckImageType(const std::string &file_name, bool *valid) {
+  std::ifstream file_handle;
+  constexpr int read_num = 3;
+  *valid = false;
+  file_handle.open(file_name, std::ios::binary | std::ios::in);
+  if (!file_handle.is_open()) {
+    RETURN_STATUS_UNEXPECTED("Can not open image file " + file_name);
+  }
+  unsigned char file_type[read_num];
+  (void)file_handle.read(reinterpret_cast<char *>(file_type), read_num);
+
+  if (file_handle.fail()) {
+    file_handle.close();
+    RETURN_STATUS_UNEXPECTED("Read image file failed " + file_name);
+  }
+  file_handle.close();
+  if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) {
+    // Normal JPEGs start with \xff\xd8\xff\xe0
+    // JPEG with EXIF stats with \xff\xd8\xff\xe1
+    // Use \xff\xd8\xff to cover both.
+    *valid = true;
+  } else if (file_type[0] == 0x89 && file_type[1] == 0x50 && file_type[2] == 0x4e) {
+    // It's a PNG
+    *valid = true;
+  } else if (file_type[0] == 0x47 && file_type[1] == 0x49 && file_type[2] == 0x46) {
+    // It's a GIF
+    *valid = true;
+  } else if (file_type[0] == 0x42 && file_type[1] == 0x4d) {
+    // It's a BMP
+    *valid = true;
+  }
+  return Status::OK();
+}
+
+Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorRow *row) {
+  std::shared_ptr<Tensor> image;
+  std::ifstream fs;
+  fs.open(image_file_path, std::ios::binary | std::ios::in);
+  if (fs.fail()) {
+    MS_LOG(INFO) << "Image file not found:" << image_file_path << ".";
+    // If file doesn't exist, we don't flag this as error in input check, simply skip
+    return Status::OK();
+  }
+
+  MS_LOG(INFO) << "Image file found: " << image_file_path << ".";
+
+  // check that the file is an image before decoding
+  bool valid = false;
+  RETURN_IF_NOT_OK(CheckImageType(image_file_path, &valid));
+  RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image));
+  if (decode_ && valid) {
+    Status rc = Decode(image, &image);
+    if (rc.IsError()) {
+      std::string err = "Fail to decode image:" + image_file_path;
+      RETURN_STATUS_UNEXPECTED(err);
+    }
+  }
+  row->push_back(std::move(image));
+  return Status::OK();
+}
+
+Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
+  std::vector<std::string> data = json_obj;
+
+  MS_LOG(INFO) << "String array label found: " << data << ".";
+  std::shared_ptr<Tensor> label;
+  RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
+  row->push_back(std::move(label));
+  return Status::OK();
+}
+
+Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
+  std::string data = json_obj;
+  // now we iterate over the elements in json
+
+  MS_LOG(INFO) << "String label found: " << data << ".";
+  std::shared_ptr<Tensor> label;
+  RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(data, &label));
+  row->push_back(std::move(label));
+  return Status::OK();
+}
+
+Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
+  std::shared_ptr<Tensor> label;
+  // consider templating this function to handle all ints
+  if (data_schema_->column(col_num).type() == DataType(DataType::DE_INT64)) {
+    std::vector<int64_t> data;
+
+    // Iterate over the integer list and add those values to the output shape tensor
+    auto items = json_obj.items();
+    using it_type = decltype(items.begin());
+    (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
+
+    RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
+  } else if (data_schema_->column(col_num).type() == DataType(DataType::DE_INT32)) {
+    std::vector<int32_t> data;
+
+    // Iterate over the integer list and add those values to the output shape tensor
+    auto items = json_obj.items();
+    using it_type = decltype(items.begin());
+    (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
+
+    MS_LOG(INFO) << "Int array found: " << data << ".";
+    RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
+  } else {
+    RETURN_STATUS_UNEXPECTED("Error in Load Int Tensor");
+  }
+  row->push_back(std::move(label));
+  return Status::OK();
+}
+
+Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorRow *row) {
+  if (data_schema_->column(col_num).type() == DataType(DataType::DE_STRING)) {
+    std::shared_ptr<Tensor> id;
+    RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, &id));
+    row->push_back(std::move(id));
+    return Status::OK();
+  }
+  // hack to get the file name without extension, the 1 is to get rid of the backslash character
+  int64_t image_id = std::atoi(file.substr(1, file.find(".")).c_str());
+  std::shared_ptr<Tensor> id;
+  RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(image_id, &id));
+  MS_LOG(INFO) << "File ID " << image_id << ".";
+  row->push_back(std::move(id));
+  return Status::OK();
+}
+
+Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorRow *row) {
+  // hack to get the file name without extension, the 1 is to get rid of the backslash character
+  std::shared_ptr<Tensor> empty_tensor;
+  RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({}), data_schema_->column(col_num).type(), &empty_tensor));
+  row->push_back(std::move(empty_tensor));
+  return Status::OK();
+}
+
+// Loads a tensor with float value, issue with float64, we don't have reverse look up to the type
+// So we actually have to check what type we want to fill the tensor with.
+// Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to
+// only be float32, seems like a weird limitation to impose
+Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
+  std::shared_ptr<Tensor> float_tensor;
+  if (data_schema_->column(col_num).type() == DataType(DataType::DE_FLOAT64)) {
+    double data = json_obj;
+    MS_LOG(INFO) << "double found: " << json_obj << ".";
+    RETURN_IF_NOT_OK(Tensor::CreateScalar<double>(data, &float_tensor));
+  } else if (data_schema_->column(col_num).type() == DataType(DataType::DE_FLOAT32)) {
+    float data = json_obj;
+    RETURN_IF_NOT_OK(Tensor::CreateScalar<float>(data, &float_tensor));
+    MS_LOG(INFO) << "float found: " << json_obj << ".";
+  }
+  row->push_back(std::move(float_tensor));
+  return Status::OK();
+}
+
+// Load 1 TensorRow (image,label) using 1 ImageColumns. 1 function call produces 1 TensorTow in a DataBuffer
+// possible optimization: the helper functions of LoadTensorRow should be optimized
+// to take a reference to a column descriptor?
+Status AlbumOp::LoadTensorRow(const std::string &file, TensorRow *row) {
+  // testing here is to just print out file path
+  (*row) = {};
+  MS_LOG(INFO) << "Image row file: " << file << ".";
+
+  std::ifstream file_handle(folder_path_ + file);
+  if (!file_handle.is_open()) {
+    RETURN_STATUS_UNEXPECTED("Json file " + folder_path_ + file + " can not open.");
+  }
+  std::string line;
+  while (getline(file_handle, line)) {
+    try {
+      nlohmann::json js = nlohmann::json::parse(line);
+      MS_LOG(INFO) << "This Line: " << line << ".";
+
+      // note if take a schema here, then we have to iterate over all column descriptors in schema and check for key
+      // get columns in schema:
+      int32_t columns = data_schema_->NumColumns();
+
+      // loop over each column descriptor, this can optimized by swtich cases
+      for (int32_t i = 0; i < columns; i++) {
+        // special case to handle
+        if (data_schema_->column(i).name() == "id") {
+          // id is internal, special case to load from file
+          RETURN_IF_NOT_OK(LoadIDTensor(file, i, row));
+          continue;
+        }
+        // find if key does not exist, insert placeholder nullptr if not found
+        if (js.find(data_schema_->column(i).name()) == js.end()) {
+          // iterator not found, push nullptr as placeholder
+          MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << ".";
+          RETURN_IF_NOT_OK(LoadEmptyTensor(i, row));
+          continue;
+        }
+        nlohmann::json column_value = js.at(data_schema_->column(i).name());
+        MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << ".";
+        bool is_array = column_value.is_array();
+        // load single string
+        if (column_value.is_string() && data_schema_->column(i).type() == DataType(DataType::DE_STRING)) {
+          RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, row));
+          continue;
+        }
+        // load string array
+        if (is_array && data_schema_->column(i).type() == DataType(DataType::DE_STRING)) {
+          RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, row));
+          continue;
+        }
+        // load image file
+        if (column_value.is_string() && data_schema_->column(i).type() != DataType(DataType::DE_STRING)) {
+          std::string image_file_path = column_value;
+          RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, row));
+          continue;
+        }
+        // load float array
+        if (!is_array && (data_schema_->column(i).type() == DataType(DataType::DE_FLOAT32) ||
+                          data_schema_->column(i).type() == DataType(DataType::DE_FLOAT64))) {
+          RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, row));
+          continue;
+        }
+        // int array
+        if (is_array && (data_schema_->column(i).type() == DataType(DataType::DE_INT64) ||
+                         data_schema_->column(i).type() == DataType(DataType::DE_INT32))) {
+          RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, row));
+          continue;
+        } else {
+          MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported.";
+          continue;
+        }
+      }
+    } catch (const std::exception &err) {
+      file_handle.close();
+      RETURN_STATUS_UNEXPECTED("Parse Json file failed");
+    }
+  }
+  file_handle.close();
+  return Status::OK();
+}
+
+// Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer
+Status AlbumOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) {
+  std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>();
+  TensorRow trow;
+
+  for (const int64_t &key : keys) {
+    RETURN_IF_NOT_OK(this->LoadTensorRow(image_rows_[key], &trow));
+    deq->push_back(std::move(trow));
+  }
+  (*db)->set_tensor_table(std::move(deq));
+  return Status::OK();
+}
+
+void AlbumOp::Print(std::ostream &out, bool show_all) const {
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <AlbumOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nNumber of rows:" << num_rows_ << "\nAlbum directory: " << folder_path_ << "\n\n";
+  }
+}
+
+// Reset Sampler and wakeup Master thread (functor)
+Status AlbumOp::Reset() {
+  RETURN_IF_NOT_OK(sampler_->ResetSampler());
+  row_cnt_ = 0;
+  wp_.Set();  // wake up master thread after reset is done
+  return Status::OK();
+}
+
+// hand shake with Sampler, allow Sampler to call RandomAccessOp's functions to get NumRows
+Status AlbumOp::InitSampler() {
+  RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this));
+  return Status::OK();
+}
+
+Status AlbumOp::LaunchThreadsAndInitOp() {
+  RETURN_UNEXPECTED_IF_NULL(tree_);
+  // registers QueueList and individual Queues for interrupt services
+  RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
+  RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks()));
+  // launch main workers that load DataBuffers by reading all images
+  RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&AlbumOp::WorkerEntry, this, std::placeholders::_1)));
+  TaskManager::FindMe()->Post();
+  RETURN_IF_NOT_OK(this->InitSampler());  // pass numRows to Sampler
+  return Status::OK();
+}
+
+// Visitor accept method for NodePass
+Status AlbumOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<AlbumOp>(), modified);
+}
+
+Status AlbumOp::ComputeColMap() {
+  // Set the column name map (base class field)
+  if (column_name_id_map_.empty()) {
+    for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
+      column_name_id_map_[data_schema_->column(i).name()] = i;
+    }
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h
@ -0,0 +1,298 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_OP_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_OP_H_
+
+#include <deque>
+#include <memory>
+#include <queue>
+#include <string>
+#include <algorithm>
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
+#include <unordered_map>
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
+
+namespace mindspore {
+namespace dataset {
+// Forward declares
+template <typename T>
+class Queue;
+
+// Define row information as a list of file objects to read
+using FolderImages = std::shared_ptr<std::pair<std::string, std::queue<std::string>>>;
+
+/// \class AlbumOp album_op.h
+class AlbumOp : public ParallelOp, public RandomAccessOp {
+ public:
+  class Builder {
+   public:
+    /// \brief Constructor for Builder class of AlbumOp
+    Builder();
+
+    /// \brief Destructor.
+    ~Builder() = default;
+
+    /// \brief Setter method
+    /// \param[in] rows_per_buffer
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
+      builder_rows_per_buffer_ = rows_per_buffer;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] size
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetOpConnectorSize(int32_t size) {
+      builder_op_connector_size_ = size;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] exts - file extensions to be read
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetExtensions(const std::set<std::string> &exts) {
+      builder_extensions_ = exts;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] do_decode
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetDecode(bool do_decode) {
+      builder_decode_ = do_decode;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] num_workers
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetNumWorkers(int32_t num_workers) {
+      builder_num_workers_ = num_workers;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] sampler
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetSampler(std::shared_ptr<Sampler> sampler) {
+      builder_sampler_ = std::move(sampler);
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] dir - dataset directory
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetAlbumDir(const std::string &dir) {
+      builder_dir_ = dir;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] file - schema file to load
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetSchemaFile(const std::string &file) {
+      builder_schema_file_ = file;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] columns - input columns
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetColumnsToLoad(const std::vector<std::string> &columns) {
+      builder_columns_to_load_ = columns;
+      return *this;
+    }
+
+    /// \brief Check validity of input args
+    /// \return - The error code return
+    Status SanityCheck();
+
+    /// \brief The builder "build" method creates the final object.
+    /// \param[inout] std::shared_ptr<AlbumOp> *op - DatasetOp
+    /// \return - The error code return
+    Status Build(std::shared_ptr<AlbumOp> *op);
+
+   private:
+    bool builder_decode_;
+    std::vector<std::string> builder_columns_to_load_;
+    std::string builder_dir_;
+    std::string builder_schema_file_;
+    int32_t builder_num_workers_;
+    int32_t builder_rows_per_buffer_;
+    int32_t builder_op_connector_size_;
+    std::set<std::string> builder_extensions_;
+    std::shared_ptr<Sampler> builder_sampler_;
+    std::unique_ptr<DataSchema> builder_schema_;
+  };
+
+  /// \brief Constructor
+  /// \param[in] num_wkrs - Num of workers reading images in parallel
+  /// \param[in] rows_per_buffer Number of images (rows) in each buffer
+  /// \param[in] file_dir - directory of Album
+  /// \param[in] queue_size - connector size
+  /// \param[in] do_decode - decode image files
+  /// \param[in] exts - set of file extensions to read, if empty, read everything under the dir
+  /// \param[in] data_schema - schema of dataset
+  /// \param[in] sampler - sampler tells AlbumOp what to read
+  AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode,
+          const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<Sampler> sampler);
+
+  /// \brief Destructor.
+  ~AlbumOp() = default;
+
+  /// \brief Initialize AlbumOp related var, calls the function to walk all files
+  /// \return - The error code return
+  Status PrescanEntry();
+
+  /// \brief Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector
+  /// \param[in] int32_t workerId - id of each worker
+  /// \return Status - The error code return
+  Status WorkerEntry(int32_t worker_id) override;
+
+  /// \brief Main Loop of AlbumOp
+  ///     Master thread: Fill IOBlockQueue, then goes to sleep
+  ///     Worker thread: pulls IOBlock from IOBlockQueue, work on it then put buffer to mOutConnector
+  /// \return Status - The error code return
+  Status operator()() override;
+
+  /// \brief A print method typically used for debugging
+  /// \param[in] out
+  /// \param[in] show_all
+  void Print(std::ostream &out, bool show_all) const override;
+
+  /// \brief Check if image ia valid.Only support JPEG/PNG/GIF/BMP
+  ///     This function could be optimized to return the tensor to reduce open/closing files
+  /// \return Status - The error code return
+  Status CheckImageType(const std::string &file_name, bool *valid);
+
+  // Base-class override for NodePass visitor acceptor.
+  // @param p - Pointer to the NodePass to be accepted.
+  // @param modified - Whether this node visit modified the pipeline.
+  // @return - Status of the node visit.
+  Status Accept(NodePass *p, bool *modified) override;
+
+  // Op name getter
+  // @return Name of the current Op
+  std::string Name() const override { return "AlbumOp"; }
+
+ private:
+  /// \brief Initialize Sampler, calls sampler->Init() within
+  /// \return Status The error code return
+  Status InitSampler();
+
+  /// \brief Load image to tensor row
+  /// \param[in] image_file Image name of file
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorRow *row);
+
+  /// \brief Load vector of ints to tensor, append tensor to tensor row
+  /// \param[in] json_obj Json object containing multi-dimensional label
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+
+  /// \brief Load string array into a tensor, append tensor to tensor row
+  /// \param[in] json_obj Json object containing string tensor
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+
+  /// \brief Load string into a tensor, append tensor to tensor row
+  /// \param[in] json_obj Json object containing string tensor
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+
+  /// \brief Load float value to tensor row
+  /// \param[in] json_obj Json object containing float
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+
+  /// \brief Load emtpy tensor to tensor row
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadEmptyTensor(uint32_t col_num, TensorRow *row);
+
+  /// \brief Load id from file name to tensor row
+  /// \param[in] file The file name to get ID from
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorRow *row);
+
+  /// \brief Load a tensor row according to a json file
+  /// \param[in] ImageColumns file Json file location
+  /// \param[inout] TensorRow row Json content stored into a tensor row
+  /// \return Status The error code return
+  Status LoadTensorRow(const std::string &file, TensorRow *row);
+
+  /// \param[in] const std::vector<int64_t> &keys Keys in ioblock
+  /// \param[inout] std::unique_ptr<DataBuffer> db Databuffer to push to
+  /// \return Status The error code return
+  Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db);
+
+  /// \brief Called first when function is called
+  /// \return The error code return
+  Status LaunchThreadsAndInitOp();
+
+  /// \brief reset Op
+  /// \return Status The error code return
+  Status Reset() override;
+
+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
+
+  int32_t rows_per_buffer_;
+  std::string folder_path_;  // directory of image folder
+  bool decode_;
+  std::set<std::string> extensions_;  // extensions allowed
+  std::unordered_map<std::string, int32_t> col_name_map_;
+  std::unique_ptr<DataSchema> data_schema_;
+  std::shared_ptr<Sampler> sampler_;
+  int64_t row_cnt_;
+  int64_t buf_cnt_;
+  int64_t sampler_ind_;
+  int64_t dirname_offset_;
+  WaitPost wp_;
+  std::vector<std::string> image_rows_;
+  QueueList<std::unique_ptr<IOBlock>> io_block_queues_;  // queues of IOBlocks
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_OP_H_
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
@ -134,7 +134,6 @@ Status ImageFolderOp::operator()() {
      TensorRow sample_row;
      RETURN_IF_NOT_OK(sampler_buffer->PopRow(&sample_row));
      std::shared_ptr<Tensor> sample_ids = sample_row[0];
-      if (sample_ids->type() != DataType(DataType::DE_INT64)) RETURN_STATUS_UNEXPECTED("Sampler Tensor isn't int64");
      for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) {
        if ((*itr) >= num_rows_) continue;  // index out of bound, skipping
        keys.push_back(*itr);
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc
@ -30,6 +30,7 @@
 #include "minddata/dataset/engine/datasetops/repeat_op.h"
 #include "minddata/dataset/engine/datasetops/skip_op.h"
 #include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/datasetops/source/album_op.h"
 #include "minddata/dataset/engine/datasetops/source/celeba_op.h"
 #include "minddata/dataset/engine/datasetops/source/cifar_op.h"
 #include "minddata/dataset/engine/datasetops/source/coco_op.h"
@ -199,6 +200,11 @@ Status NodePass::RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified)
  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }

+Status NodePass::RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
 Status NodePass::RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
  // Fallback to base class visitor by default
  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.h
@ -49,6 +49,8 @@ class FilterOp;
 class GeneratorOp;
 #endif

+class AlbumOp;
+
 class RandomDataOp;

 class RepeatOp;
@ -178,6 +180,8 @@ class NodePass : public Pass {

  virtual Status RunOnNode(std::shared_ptr<RandomDataOp> node, bool *modified);

+  virtual Status RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified);
+
  virtual Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified);

  virtual Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified);
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.cc
@ -21,6 +21,7 @@
 #include "minddata/dataset/engine/datasetops/cache_lookup_op.h"
 #include "minddata/dataset/engine/datasetops/cache_merge_op.h"
 #include "minddata/dataset/engine/datasetops/cache_op.h"
+#include "minddata/dataset/engine/datasetops/source/album_op.h"
 #include "minddata/dataset/engine/datasetops/source/celeba_op.h"
 #include "minddata/dataset/engine/datasetops/source/cifar_op.h"
 #include "minddata/dataset/engine/datasetops/source/coco_op.h"
@ -152,6 +153,11 @@ Status CacheTransformPass::CachePass::RunOnNode(std::shared_ptr<ImageFolderOp> n
  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
 }

+// Perform leaf node cache transform identification
+Status CacheTransformPass::CachePass::RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
 // Perform leaf node cache transform identification
 Status CacheTransformPass::CachePass::RunOnNode(std::shared_ptr<MnistOp> node, bool *modified) {
  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.h
@ -79,6 +79,12 @@ class CacheTransformPass : public TreePass {
    /// \return Status The error code return
    Status RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified) override;

+    /// \brief Perform leaf node cache tranform identifications
+    /// \param[in] node The node being visited
+    /// \param[inout] modified Indicator if the node was changed at all
+    /// \return Status The error code return
+    Status RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) override;
+
    /// \brief Perform leaf node cache tranform identifications
    /// \param[in] node The node being visited
    /// \param[inout] modified Indicator if the node was changed at all
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.cc
@ -111,5 +111,11 @@ Status PrinterPass::RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modifie
  std::cout << "Visiting ImageFolderOp" << '\n';
  return Status::OK();
 }
+
+Status PrinterPass::RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) {
+  *modified = false;
+  std::cout << "Visiting ImageFolderOp" << '\n';
+  return Status::OK();
+}
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.h
@ -58,6 +58,8 @@ class PrinterPass : public NodePass {
  Status RunOnNode(std::shared_ptr<DeviceQueueOp> node, bool *modified) override;

  Status RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified) override;
+
+  Status RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) override;
 };

 }  // namespace dataset
--- a/mindspore/ccsrc/minddata/dataset/include/datasets.h
+++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h
@ -48,6 +48,7 @@ class TensorOperation;
 class SchemaObj;
 class SamplerObj;
 // Datasets classes (in alphabetical order)
+class AlbumDataset;
 class CelebADataset;
 class Cifar10Dataset;
 class Cifar100Dataset;
@ -79,13 +80,27 @@ class ZipDataset;
 /// \return Shared pointer to the current schema
 std::shared_ptr<SchemaObj> Schema(const std::string &schema_file = "");

+/// \brief Function to create an AlbumDataset
+/// \notes The generated dataset is specified through setting a schema
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] data_schema Path to dataset schema file
+/// \param[in] column_names Column names used to specify columns to load, if empty, will read all columns.
+///     (default = {})
+/// \param[in] decode the option to decode the images in dataset (default = false)
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
+///     A `RandomSampler` will be used to randomly iterate the entire dataset (default = nullptr)
+/// \return Shared pointer to the current Dataset
+std::shared_ptr<AlbumDataset> Album(const std::string &dataset_dir, const std::string &data_schema,
+                                    const std::vector<std::string> &column_names = {}, bool decode = false,
+                                    const std::shared_ptr<SamplerObj> &sampler = nullptr);
+
 /// \brief Function to create a CelebADataset
 /// \notes The generated dataset has two columns ['image', 'attr'].
-//     The type of the image tensor is uint8. The attr tensor is uint32 and one hot type.
+//      The type of the image tensor is uint8. The attr tensor is uint32 and one hot type.
 /// \param[in] dataset_dir Path to the root directory that contains the dataset.
 /// \param[in] dataset_type One of 'all', 'train', 'valid' or 'test'.
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+///     will be used to randomly iterate the entire dataset
 /// \param[in] decode Decode the images after reading (default=false).
 /// \param[in] extensions Set of file extensions to be included in the dataset (default={}).
 /// \return Shared pointer to the current Dataset
@ -97,7 +112,7 @@ std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std:
 /// \notes The generated dataset has two columns ['image', 'label']
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+///     will be used to randomly iterate the entire dataset
 /// \return Shared pointer to the current Dataset
 std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir,
                                        const std::shared_ptr<SamplerObj> &sampler = nullptr);
@ -106,7 +121,7 @@ std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir,
 /// \notes The generated dataset has three columns ['image', 'coarse_label', 'fine_label']
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+///     will be used to randomly iterate the entire dataset
 /// \return Shared pointer to the current Dataset
 std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir,
                                          const std::shared_ptr<SamplerObj> &sampler = nullptr);
@ -114,19 +129,19 @@ std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir,
 /// \brief Function to create a CLUEDataset
 /// \notes The generated dataset has a variable number of columns depending on the task and usage
 /// \param[in] dataset_files List of files to be read to search for a pattern of files. The list
-///    will be sorted in a lexicographical order.
+///     will be sorted in a lexicographical order.
 /// \param[in] task The kind of task, one of "AFQMC", "TNEWS", "IFLYTEK", "CMNLI", "WSC" and "CSL" (default="AFQMC").
 /// \param[in] usage Be used to "train", "test" or "eval" data (default="train").
 /// \param[in] num_samples The number of samples to be included in the dataset.
-///    (Default = 0 means all samples.)
-/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode::kGlobal)
-///    Can be any of:
-///    ShuffleMode::kFalse - No shuffling is performed.
-///    ShuffleMode::kFiles - Shuffle files only.
-///    ShuffleMode::kGlobal - Shuffle both the files and samples.
+///     (Default = 0 means all samples.)
+/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode.kGlobal)
+///     Can be any of:
+///     ShuffleMode.kFalse - No shuffling is performed.
+///     ShuffleMode.kFiles - Shuffle files only.
+///     ShuffleMode.kGlobal - Shuffle both the files and samples.
 /// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1)
 /// \param[in] shard_id The shard ID within num_shards. This argument should be
-///    specified only when num_shards is also specified. (Default = 0)
+///     specified only when num_shards is also specified. (Default = 0)
 /// \return Shared pointer to the current CLUEDataset
 std::shared_ptr<CLUEDataset> CLUE(const std::vector<std::string> &dataset_files, const std::string &task = "AFQMC",
                                  const std::string &usage = "train", int64_t num_samples = 0,
@ -135,19 +150,19 @@ std::shared_ptr<CLUEDataset> CLUE(const std::vector<std::string> &dataset_files,

 /// \brief Function to create a CocoDataset
 /// \notes The generated dataset has multi-columns :
-///        - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
-///                                     ['iscrowd', dtype=uint32]].
-///        - task='Stuff', column: [['image', dtype=uint8], ['segmentation',dtype=float32], ['iscrowd', dtype=uint32]].
-///        - task='Keypoint', column: [['image', dtype=uint8], ['keypoints', dtype=float32],
-///                                    ['num_keypoints', dtype=uint32]].
-///        - task='Panoptic', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
-///                                    ['iscrowd', dtype=uint32], ['area', dtype=uitn32]].
+///     - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
+///                                  ['iscrowd', dtype=uint32]].
+///     - task='Stuff', column: [['image', dtype=uint8], ['segmentation',dtype=float32], ['iscrowd', dtype=uint32]].
+///     - task='Keypoint', column: [['image', dtype=uint8], ['keypoints', dtype=float32],
+///                                 ['num_keypoints', dtype=uint32]].
+///     - task='Panoptic', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
+///                                 ['iscrowd', dtype=uint32], ['area', dtype=uitn32]].
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] annotation_file Path to the annotation json
 /// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'
 /// \param[in] decode Decode the images after reading
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+///     will be used to randomly iterate the entire dataset
 /// \return Shared pointer to the current Dataset
 std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file,
                                  const std::string &task = "Detection", const bool &decode = false,
@ -181,12 +196,12 @@ std::shared_ptr<CSVDataset> CSV(const std::vector<std::string> &dataset_files, c

 /// \brief Function to create an ImageFolderDataset
 /// \notes A source dataset that reads images from a tree of directories
-///    All images within one folder have the same label
-///    The generated dataset has two columns ['image', 'label']
+///     All images within one folder have the same label
+///     The generated dataset has two columns ['image', 'label']
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] decode A flag to decode in ImageFolder
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
-///    A `RandomSampler` will be used to randomly iterate the entire dataset
+///     A `RandomSampler` will be used to randomly iterate the entire dataset
 /// \param[in] extensions File extensions to be read
 /// \param[in] class_indexing a class name to label map
 /// \return Shared pointer to the current ImageFolderDataset
@ -200,9 +215,9 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir,
 /// \param[in] dataset_file The dataset file to be read
 /// \param[in] usage Need "train", "eval" or "inference" data (default="train")
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
-///    A `RandomSampler` will be used to randomly iterate the entire dataset
+///     A `RandomSampler` will be used to randomly iterate the entire dataset
 /// \param[in] class_indexing A str-to-int mapping from label name to index (default={}, the folder
-///    names will be sorted alphabetically and each class will be given a unique index starting from 0).
+///     names will be sorted alphabetically and each class will be given a unique index starting from 0).
 /// \param[in] decode Decode the images after reading (default=false).
 /// \return Shared pointer to the current ManifestDataset
 std::shared_ptr<ManifestDataset> Manifest(std::string dataset_file, std::string usage = "train",
@ -214,7 +229,7 @@ std::shared_ptr<ManifestDataset> Manifest(std::string dataset_file, std::string
 /// \notes The generated dataset has two columns ['image', 'label']
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
-///    A `RandomSampler` will be used to randomly iterate the entire dataset
+///     A `RandomSampler` will be used to randomly iterate the entire dataset
 /// \return Shared pointer to the current MnistDataset
 std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir,
                                    const std::shared_ptr<SamplerObj> &sampler = nullptr);
@ -245,17 +260,17 @@ std::shared_ptr<RandomDataset> RandomData(const int32_t &total_rows = 0, T schem
 /// \brief Function to create a TextFileDataset
 /// \notes The generated dataset has one column ['text']
 /// \param[in] dataset_files List of files to be read to search for a pattern of files. The list
-///    will be sorted in a lexicographical order.
+///     will be sorted in a lexicographical order.
 /// \param[in] num_samples The number of samples to be included in the dataset.
-///    (Default = 0 means all samples.)
-/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode::kGlobal)
-///    Can be any of:
-///    ShuffleMode::kFalse - No shuffling is performed.
-///    ShuffleMode::kFiles - Shuffle files only.
-///    ShuffleMode::kGlobal - Shuffle both the files and samples.
+///     (Default = 0 means all samples.)
+/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode.kGlobal)
+///     Can be any of:
+///     ShuffleMode.kFalse - No shuffling is performed.
+///     ShuffleMode.kFiles - Shuffle files only.
+///     ShuffleMode.kGlobal - Shuffle both the files and samples.
 /// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1)
 /// \param[in] shard_id The shard ID within num_shards. This argument should be
-///    specified only when num_shards is also specified. (Default = 0)
+///     specified only when num_shards is also specified. (Default = 0)
 /// \return Shared pointer to the current TextFileDataset
 std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &dataset_files, int64_t num_samples = 0,
                                          ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1,
@ -263,16 +278,16 @@ std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &datase

 /// \brief Function to create a VOCDataset
 /// \notes The generated dataset has multi-columns :
-///        - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32],
-///                                     ['difficult', dtype=uint32], ['truncate', dtype=uint32]].
-///        - task='Segmentation', column: [['image', dtype=uint8], ['target',dtype=uint8]].
+///     - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32],
+///                                  ['difficult', dtype=uint32], ['truncate', dtype=uint32]].
+///     - task='Segmentation', column: [['image', dtype=uint8], ['target',dtype=uint8]].
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] task Set the task type of reading voc data, now only support "Segmentation" or "Detection"
 /// \param[in] mode Set the data list txt file to be readed
 /// \param[in] class_indexing A str-to-int mapping from label name to index
 /// \param[in] decode Decode the images after reading
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+///     will be used to randomly iterate the entire dataset
 /// \return Shared pointer to the current Dataset
 std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task = "Segmentation",
                                const std::string &mode = "train",
@ -335,9 +350,9 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
  /// \notes Combines batch_size number of consecutive rows into batches
  /// \param[in] batch_size Path to the root directory that contains the dataset
  /// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete
-  ///    batch. If true, and if there are less than batch_size rows
-  ///    available to make the last batch, then those rows will
-  ///    be dropped and not propagated to the next node
+  ///     batch. If true, and if there are less than batch_size rows
+  ///     available to make the last batch, then those rows will
+  ///     be dropped and not propagated to the next node
  /// \return Shared pointer to the current BatchDataset
  std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false);

@ -368,16 +383,16 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
  /// \brief Function to create a MapDataset
  /// \notes Applies each operation in operations to this dataset
  /// \param[in] operations Vector of operations to be applied on the dataset. Operations are
-  ///    applied in the order they appear in this list
+  ///     applied in the order they appear in this list
  /// \param[in] input_columns Vector of the names of the columns that will be passed to the first
-  ///    operation as input. The size of this list must match the number of
-  ///    input columns expected by the first operator. The default input_columns
-  ///    is the first column
+  ///     operation as input. The size of this list must match the number of
+  ///     input columns expected by the first operator. The default input_columns
+  ///     is the first column
  /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation
-  ///    This parameter is mandatory if len(input_columns) != len(output_columns)
-  ///    The size of this list must match the number of output columns of the
-  ///    last operation. The default output_columns will have the same
-  ///    name as the input columns, i.e., the columns will be replaced
+  ///     This parameter is mandatory if len(input_columns) != len(output_columns)
+  ///     The size of this list must match the number of output columns of the
+  ///     last operation. The default output_columns will have the same
+  ///     name as the input columns, i.e., the columns will be replaced
  /// \param[in] project_columns A list of column names to project
  /// \return Shared pointer to the current MapDataset
  std::shared_ptr<MapDataset> Map(std::vector<std::shared_ptr<TensorOperation>> operations,
@ -404,7 +419,7 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
  /// \param[in] count Number of times the dataset should be repeated
  /// \return Shared pointer to the current Dataset
  /// \note Repeat will return shared pointer to `Dataset` instead of `RepeatDataset`
-  ///    due to a limitation in the current implementation
+  ///     due to a limitation in the current implementation
  std::shared_ptr<Dataset> Repeat(int32_t count = -1);

  /// \brief Function to create a Shuffle Dataset
@ -506,6 +521,31 @@ class SchemaObj {
 // DERIVED DATASET CLASSES FOR LEAF-NODE DATASETS
 // (In alphabetical order)

+class AlbumDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  AlbumDataset(const std::string &dataset_dir, const std::string &data_schema,
+               const std::vector<std::string> &column_names, bool decode, const std::shared_ptr<SamplerObj> &sampler);
+
+  /// \brief Destructor
+  ~AlbumDataset() = default;
+
+  /// \brief a base class override function to create a runtime dataset op object from this class
+  /// \return shared pointer to the newly created DatasetOp
+  std::vector<std::shared_ptr<DatasetOp>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  std::string dataset_dir_;
+  std::string schema_path_;
+  std::vector<std::string> column_names_;
+  bool decode_;
+  std::shared_ptr<SamplerObj> sampler_;
+};
+
 class CelebADataset : public Dataset {
 public:
  /// \brief Constructor
--- a/tests/ut/cpp/dataset/CMakeLists.txt
+++ b/tests/ut/cpp/dataset/CMakeLists.txt
@ -5,6 +5,7 @@ SET(DE_UT_SRCS
        common/cvop_common.cc
        common/bboxop_common.cc
        auto_contrast_op_test.cc
+	album_op_test.cc
        batch_op_test.cc
        bit_functions_test.cc
        storage_container_test.cc
@ -101,6 +102,7 @@ SET(DE_UT_SRCS
        c_api_samplers_test.cc
        c_api_transforms_test.cc
        c_api_dataset_ops_test.cc
+        c_api_dataset_album_test.cc
        c_api_dataset_cifar_test.cc
        c_api_dataset_clue_test.cc
        c_api_dataset_coco_test.cc
--- a/tests/ut/cpp/dataset/album_op_test.cc
+++ b/tests/ut/cpp/dataset/album_op_test.cc
@ -0,0 +1,208 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <string>
+#include "common/common.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/album_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
+#include "gtest/gtest.h"
+#include "utils/log_adapter.h"
+#include "securec.h"
+#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/include/transforms.h"
+
+using namespace mindspore::dataset;
+using mindspore::MsLogLevel::ERROR;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::LogStream;
+
+std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);
+
+std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
+
+std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
+
+std::shared_ptr<AlbumOp> Album(int64_t num_works, int64_t rows, int64_t conns, std::string path,
+                                           bool shuf = false, std::unique_ptr<Sampler> sampler = nullptr,
+                                           bool decode = false) {
+  std::shared_ptr<AlbumOp> so;
+  AlbumOp::Builder builder;
+  Status rc = builder.SetNumWorkers(num_works)
+                     .SetAlbumDir(path)
+                     .SetRowsPerBuffer(rows)
+                     .SetOpConnectorSize(conns)
+                     .SetExtensions({".json"})
+                     .SetSampler(std::move(sampler))
+                     .SetDecode(decode)
+                     .Build(&so);
+  return so;
+}
+
+std::shared_ptr<AlbumOp> AlbumSchema(int64_t num_works, int64_t rows, int64_t conns, std::string path,
+                                       std::string schema_file, std::vector<std::string> column_names = {},
+                                       bool shuf = false, std::unique_ptr<Sampler> sampler = nullptr,
+                                       bool decode = false) {
+  std::shared_ptr<AlbumOp> so;
+  AlbumOp::Builder builder;
+  Status rc = builder.SetNumWorkers(num_works)
+    .SetSchemaFile(schema_file)
+    .SetColumnsToLoad(column_names)
+    .SetAlbumDir(path)
+    .SetRowsPerBuffer(rows)
+    .SetOpConnectorSize(conns)
+    .SetExtensions({".json"})
+    .SetSampler(std::move(sampler))
+    .SetDecode(decode)
+    .Build(&so);
+  return so;
+}
+
+class MindDataTestAlbum : public UT::DatasetOpTesting {
+ protected:
+};
+
+TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchema) {
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file, column_names, false), Repeat(2)});
+  tree->Prepare();
+  Status rc = tree->Launch();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during tree launch: " <<  ".";
+    EXPECT_TRUE(false);
+  } else {
+    DatasetIterator di(tree);
+    TensorMap tensor_map;
+    di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE(rc.IsOk());
+    uint64_t i = 0;
+    int32_t label = 0;
+    while (tensor_map.size() != 0) {
+      tensor_map["label"]->GetItemAt<int32_t>(&label, {});
+      MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
+                    << tensor_map["label"] << "\n";
+      i++;
+      di.GetNextAsMap(&tensor_map);
+    }
+    MS_LOG(INFO) << "got rows" << i << "\n";
+    EXPECT_TRUE(i == 14);
+  }
+}
+
+TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaNoOrder) {
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file), Repeat(2)});
+  tree->Prepare();
+  Status rc = tree->Launch();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during tree launch: " << ".";
+    EXPECT_TRUE(false);
+  } else {
+    DatasetIterator di(tree);
+    TensorMap tensor_map;
+    di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE(rc.IsOk());
+    uint64_t i = 0;
+    int32_t label = 0;
+    while (tensor_map.size() != 0) {
+      tensor_map["label"]->GetItemAt<int32_t>(&label, {});
+      MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
+                    << tensor_map["label"] << "\n";
+      i++;
+      di.GetNextAsMap(&tensor_map);
+    }
+    MS_LOG(INFO) << "got rows" << i << "\n";
+    EXPECT_TRUE(i == 14);
+  }
+}
+
+TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaFloat) {
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  // add the priority column
+  std::string schema_file = datasets_root_path_ + "/testAlbum/floatSchema.json";
+  auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file), Repeat(2)});
+  tree->Prepare();
+  Status rc = tree->Launch();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during tree launch: " << ".";
+    EXPECT_TRUE(false);
+  } else {
+    DatasetIterator di(tree);
+    TensorMap tensor_map;
+    di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE(rc.IsOk());
+    uint64_t i = 0;
+    int32_t label = 0;
+    double priority = 0;
+    while (tensor_map.size() != 0) {
+      tensor_map["label"]->GetItemAt<int32_t>(&label, {});
+      tensor_map["_priority"]->GetItemAt<double>(&priority, {});
+      MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
+                    << tensor_map["label"]  << "priority: " << priority << "\n";
+      i++;
+      di.GetNextAsMap(&tensor_map);
+    }
+    MS_LOG(INFO) << "got rows" << i << "\n";
+    EXPECT_TRUE(i == 14);
+  }
+}
+
+TEST_F(MindDataTestAlbum, TestSequentialAlbumWithFullSchema) {
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  // add the priority column
+  std::string schema_file = datasets_root_path_ + "/testAlbum/fullSchema.json";
+  auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file), Repeat(2)});
+  tree->Prepare();
+  Status rc = tree->Launch();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during tree launch: " << ".";
+    EXPECT_TRUE(false);
+  } else {
+    DatasetIterator di(tree);
+    TensorMap tensor_map;
+    di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE(rc.IsOk());
+    uint64_t i = 0;
+    int32_t label = 0;
+    double priority = 0;
+    while (tensor_map.size() != 0) {
+      tensor_map["label"]->GetItemAt<int32_t>(&label, {});
+      tensor_map["_priority"]->GetItemAt<double>(&priority, {});
+      MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
+                    << tensor_map["label"]  << "priority: " << priority << " embedding : " <<
+		    tensor_map["_embedding"]->shape() << "\n";
+      i++;
+      di.GetNextAsMap(&tensor_map);
+    }
+    MS_LOG(INFO) << "got rows" << i << "\n";
+    EXPECT_TRUE(i == 14);
+  }
+}
+
--- a/tests/ut/cpp/dataset/c_api_dataset_album_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_album_test.cc
@ -0,0 +1,136 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/common.h"
+#include "minddata/dataset/include/datasets.h"
+
+using namespace mindspore::dataset::api;
+using mindspore::dataset::Tensor;
+
+class MindDataTestPipeline : public UT::DatasetOpTesting {
+ protected:
+};
+
+TEST_F(MindDataTestPipeline, TestAlbumBasic) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumBasic.";
+
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  // Create a Album Dataset
+  std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names);
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_EQ(i, 7);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestAlbumDecode) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumDecode.";
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  // Create a Album Dataset
+  std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, true);
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    auto shape = image->shape();
+    MS_LOG(INFO) << "Tensor image shape size: " << shape.Size();
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    EXPECT_GT(shape.Size(), 1); // Verify decode=true took effect
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_EQ(i, 7);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestAlbumNumSamplers) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumNumSamplers.";
+  
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  // Create a Album Dataset
+  std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(0, 1));
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_EQ(i, 1);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestAlbumError) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumError.";
+  std::string folder_path = datasets_root_path_ + "/testAlbum/ima";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  // Create a Album Dataset
+  std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(0, 1));
+
+  EXPECT_EQ(ds, nullptr);
+}
--- a/tests/ut/cpp/runtest.sh
+++ b/tests/ut/cpp/runtest.sh
@ -32,6 +32,8 @@ export GLOG_v=2

 ## prepare data for dataset & mindrecord
 cp -fr $PROJECT_PATH/tests/ut/data ${PROJECT_PATH}/build/mindspore/tests/ut/cpp/
+## prepare album dataset, uses absolute path so has to be generated
+python ${PROJECT_PATH}/build/mindspore/tests/ut/cpp/data/dataset/testAlbum/gen_json.py

 if [ $# -gt 0 ]; then 
  ./ut_tests --gtest_filter=$1
--- a/tests/ut/data/dataset/testAlbum/bin/sample.bin
+++ b/tests/ut/data/dataset/testAlbum/bin/sample.bin
@ -0,0 +1 @@
+just some random stuff
--- a/tests/ut/data/dataset/testAlbum/datasetSchema.json
+++ b/tests/ut/data/dataset/testAlbum/datasetSchema.json
@ -0,0 +1,16 @@
+{
+  "columns": {
+    "image": {
+      "type": "uint8",
+      "rank": 1
+    },
+    "label" : {
+      "type": "string",
+      "rank": 1
+    },
+    "id" : {
+      "type": "int64",
+      "rank": 0
+    }
+  }
+}
--- a/tests/ut/data/dataset/testAlbum/floatSchema.json
+++ b/tests/ut/data/dataset/testAlbum/floatSchema.json
@ -5,7 +5,7 @@
      "rank": 1
    },
    "label" : {
-      "type": "int32",
+      "type": "string",
      "rank": 1
    },
    "id" : {
--- a/tests/ut/data/dataset/testAlbum/fullSchema.json
+++ b/tests/ut/data/dataset/testAlbum/fullSchema.json
@ -5,7 +5,7 @@
      "rank": 1
    },
    "label" : {
-      "type": "int32",
+      "type": "string",
      "rank": 1
    },
    "id" : {
--- a/tests/ut/data/dataset/testAlbum/gen_json.py
+++ b/tests/ut/data/dataset/testAlbum/gen_json.py
@ -2,21 +2,21 @@ import json
 import os

 def dump_json_from_dict(structure, file_name):
-    with open(file_name + '.json', 'w') as file_path:
-        json.dump(structure, file_path)
+    with open(file_name + '.json', 'w') as fp:
+        json.dump(structure, fp)

 if __name__ == '__main__':
-    # iterate over directory
-    DIRECTORY = "imagefolder"
-    i = 0
+    # iterate over DIRECTORY
+    DIRECTORY = os.path.dirname(os.path.realpath(__file__)) + "/original"
+    PARENT_DIR = os.path.dirname(DIRECTORY)
+    i = -1
    for filename in os.listdir(DIRECTORY):
        default_dict = {}
        default_dict.update(dataset='')
-        default_dict.update(image=(os.path.join(DIRECTORY, filename)))
-        default_dict.update(label=[1, 2])
+        default_dict.update(image=os.path.abspath(os.path.join(DIRECTORY, filename)))
+        default_dict.update(label=['3', '2'])
        default_dict.update(_priority=0.8)
-        default_dict.update(_embedding='sample.bin')
-        default_dict.update(_segmented_image=(os.path.join(DIRECTORY, filename)))
-        default_dict.update(_processed_image=(os.path.join(DIRECTORY, filename)))
+        default_dict.update(_embedding=os.path.abspath(os.path.join(PARENT_DIR, 'sample.bin')))
+        default_dict.update(_processed_image=os.path.abspath(os.path.join(DIRECTORY, filename)))
        i = i + 1
-        dump_json_from_dict(default_dict, 'images/'+str(i))
+        dump_json_from_dict(default_dict, PARENT_DIR + '/images/'+str(i))
--- a/tests/ut/data/dataset/testAlbum/images/0.json
+++ b/tests/ut/data/dataset/testAlbum/images/0.json
@ -0,0 +1 @@
+{"dataset": "", "image": "original/apple_expect_decoded.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "sample.bin", "_processed_image": "original/apple_expect_decoded.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/1.json
+++ b/tests/ut/data/dataset/testAlbum/images/1.json
@ -1 +1 @@
-{"dataset": "", "image": "imagefolder/apple_expect_decoded.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_decoded.jpg", "_processed_image": "imagefolder/apple_expect_decoded.jpg"}
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_resize_bilinear.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_resize_bilinear.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/2.json
+++ b/tests/ut/data/dataset/testAlbum/images/2.json
@ -1 +1 @@
-{"dataset": "", "image": "imagefolder/apple_expect_resize_bilinear.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_resize_bilinear.jpg", "_processed_image": "imagefolder/apple_expect_resize_bilinear.jpg"}
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_changemode.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_changemode.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/3.json
+++ b/tests/ut/data/dataset/testAlbum/images/3.json
@ -1 +1 @@
-{"dataset": "", "image": "imagefolder/apple_expect_changemode.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_changemode.jpg", "_processed_image": "imagefolder/apple_expect_changemode.jpg"}
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_not_flip.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_not_flip.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/4.json
+++ b/tests/ut/data/dataset/testAlbum/images/4.json
@ -1 +1 @@
-{"dataset": "", "image": "imagefolder/apple_expect_not_flip.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_not_flip.jpg", "_processed_image": "imagefolder/apple_expect_not_flip.jpg"}
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_flipped_horizontal.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_flipped_horizontal.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/5.json
+++ b/tests/ut/data/dataset/testAlbum/images/5.json
@ -1 +1 @@
-{"dataset": "", "image": "imagefolder/apple_expect_flipped_horizontal.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_flipped_horizontal.jpg", "_processed_image": "imagefolder/apple_expect_flipped_horizontal.jpg"}
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_rescaled.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_rescaled.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/6.json
+++ b/tests/ut/data/dataset/testAlbum/images/6.json
@ -1 +1 @@
-{"dataset": "", "image": "imagefolder/apple_expect_rescaled.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_rescaled.jpg", "_processed_image": "imagefolder/apple_expect_rescaled.jpg"}
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_flipped_vertical.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_flipped_vertical.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/7.json
+++ b/tests/ut/data/dataset/testAlbum/images/7.json
@ -1 +0,0 @@
-{"dataset": "", "image": "imagefolder/apple_expect_flipped_vertical.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_flipped_vertical.jpg", "_processed_image": "imagefolder/apple_expect_flipped_vertical.jpg"}
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_changemode.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_changemode.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_decoded.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_decoded.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_flipped_horizontal.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_flipped_horizontal.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_flipped_vertical.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_flipped_vertical.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_not_flip.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_not_flip.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_rescaled.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_rescaled.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_resize_bilinear.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_resize_bilinear.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_changemode.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_changemode.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_decoded.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_decoded.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_flipped_horizontal.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_flipped_horizontal.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_flipped_vertical.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_flipped_vertical.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_not_flip.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_not_flip.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_rescaled.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_rescaled.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_resize_bilinear.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_resize_bilinear.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_changemode.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_changemode.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_decoded.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_decoded.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_flipped_horizontal.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_flipped_horizontal.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_flipped_vertical.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_flipped_vertical.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_not_flip.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_not_flip.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_rescaled.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_rescaled.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_resize_bilinear.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_resize_bilinear.jpg
				`@ -0,0 +1 @@`
				`{"dataset": "", "image": "original/apple_expect_decoded.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "sample.bin", "_processed_image": "original/apple_expect_decoded.jpg"}`
				`@ -1 +0,0 @@`
				`{"dataset": "", "image": "imagefolder/apple_expect_flipped_vertical.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_flipped_vertical.jpg", "_processed_image": "imagefolder/apple_expect_flipped_vertical.jpg"}`