support large file on windows
This commit is contained in:
parent
8bf5fadde5
commit
f0f4a0b50d
|
@ -105,7 +105,7 @@ Status ValidateDatasetFilesParam(const std::string &dataset_name, const std::vec
|
|||
LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||
}
|
||||
|
||||
for (auto f : dataset_files) {
|
||||
for (const auto &f : dataset_files) {
|
||||
Path dataset_file(f);
|
||||
if (!dataset_file.Exists()) {
|
||||
std::string err_msg = dataset_name + ": " + file_name + ": [" + f + "] is invalid or does not exist.";
|
||||
|
|
|
@ -174,6 +174,7 @@ Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
|
|||
// Create Schema Object
|
||||
std::unique_ptr<DataSchema> data_schema = std::make_unique<DataSchema>();
|
||||
if (!schema_path_.empty()) {
|
||||
RETURN_IF_NOT_OK(ValidateDatasetFilesParam("TFRecordDataset", {schema_path_}));
|
||||
RETURN_IF_NOT_OK(data_schema->LoadSchemaFile(schema_path_, columns_list_));
|
||||
} else if (schema_obj_ != nullptr) {
|
||||
std::string schema_json_string = schema_obj_->to_json();
|
||||
|
|
|
@ -5535,15 +5535,7 @@ TFRecord(const std::vector<std::string> &dataset_files, const T &schema = nullpt
|
|||
VectorStringToChar(columns_list), num_samples, shuffle, num_shards, shard_id,
|
||||
shard_equal_rows, cache, StringToChar(compression_type));
|
||||
} else {
|
||||
std::string schema_path = schema;
|
||||
if (!schema_path.empty()) {
|
||||
struct stat sb {};
|
||||
int rc = stat(schema_path.c_str(), &sb);
|
||||
if (rc != 0) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
ds = std::make_shared<TFRecordDataset>(VectorStringToChar(dataset_files), StringToChar(schema_path),
|
||||
ds = std::make_shared<TFRecordDataset>(VectorStringToChar(dataset_files), StringToChar(schema),
|
||||
VectorStringToChar(columns_list), num_samples, shuffle, num_shards, shard_id,
|
||||
shard_equal_rows, cache, StringToChar(compression_type));
|
||||
}
|
||||
|
|
|
@ -2332,9 +2332,7 @@ Status ReadFile(const std::string &filename, std::shared_ptr<Tensor> *output) {
|
|||
if (!realpath.has_value()) {
|
||||
RETURN_STATUS_UNEXPECTED("ReadFile: Invalid file path, " + filename + " does not exist.");
|
||||
}
|
||||
struct stat sb;
|
||||
stat(realpath.value().c_str(), &sb);
|
||||
if (S_ISREG(sb.st_mode) == 0) {
|
||||
if (!Path(realpath.value()).IsFile()) {
|
||||
RETURN_STATUS_UNEXPECTED("ReadFile: Invalid file path, " + filename + " is not a regular file.");
|
||||
}
|
||||
|
||||
|
@ -2350,9 +2348,7 @@ Status ReadImage(const std::string &filename, std::shared_ptr<Tensor> *output, I
|
|||
std::string err_msg = "ReadImage: Invalid file path, " + filename + " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
struct stat sb;
|
||||
stat(realpath.value().c_str(), &sb);
|
||||
if (S_ISREG(sb.st_mode) == 0) {
|
||||
if (!Path(realpath.value()).IsFile()) {
|
||||
RETURN_STATUS_UNEXPECTED("ReadImage: Invalid file path, " + filename + " is not a regular file.");
|
||||
}
|
||||
|
||||
|
@ -2416,9 +2412,7 @@ Status WriteFile(const std::string &filename, const std::shared_ptr<Tensor> &dat
|
|||
if (!realpath.has_value()) {
|
||||
RETURN_STATUS_UNEXPECTED("WriteFile: Invalid file path, " + filename + " failed to get the real path.");
|
||||
}
|
||||
struct stat sb;
|
||||
stat(realpath.value().c_str(), &sb);
|
||||
if (S_ISREG(sb.st_mode) == 0) {
|
||||
if (!Path(realpath.value()).IsFile()) {
|
||||
RETURN_STATUS_UNEXPECTED("WriteFile: Invalid file path, " + filename + " is not a regular file.");
|
||||
}
|
||||
|
||||
|
@ -2501,9 +2495,7 @@ Status WriteJpeg(const std::string &filename, const std::shared_ptr<Tensor> &ima
|
|||
if (!realpath.has_value()) {
|
||||
RETURN_STATUS_UNEXPECTED("WriteJpeg: Invalid file path, " + filename + " failed to get the real path.");
|
||||
}
|
||||
struct stat sb;
|
||||
stat(realpath.value().c_str(), &sb);
|
||||
if (S_ISREG(sb.st_mode) == 0) {
|
||||
if (!Path(realpath.value()).IsFile()) {
|
||||
RETURN_STATUS_UNEXPECTED("WriteJpeg: Invalid file path, " + filename + " is not a regular file.");
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,8 @@
|
|||
#include <sstream>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <direct.h> // for _mkdir
|
||||
#include <direct.h> // for _mkdir
|
||||
#define stat _stat64 // for file size exceeds (1<<31)-1 bytes
|
||||
#endif
|
||||
|
||||
#include "./securec.h"
|
||||
|
@ -57,7 +58,7 @@ Path::Path(const char *p) {
|
|||
#endif
|
||||
}
|
||||
|
||||
Path::Path(const Path &p) : path_(p.path_) {}
|
||||
Path::Path(const Path &p) = default;
|
||||
|
||||
Path &Path::operator=(const Path &p) {
|
||||
if (&p != this) {
|
||||
|
@ -138,7 +139,7 @@ std::string Path::Extension() const {
|
|||
}
|
||||
|
||||
bool Path::Exists() {
|
||||
struct stat sb;
|
||||
struct stat sb {};
|
||||
int rc = stat(common::SafeCStr(path_), &sb);
|
||||
if (rc == -1 && errno != ENOENT) {
|
||||
MS_LOG(WARNING) << "Unable to query the status of " << path_ << ". Errno = " << errno << ".";
|
||||
|
@ -147,7 +148,7 @@ bool Path::Exists() {
|
|||
}
|
||||
|
||||
bool Path::IsDirectory() {
|
||||
struct stat sb;
|
||||
struct stat sb {};
|
||||
int rc = stat(common::SafeCStr(path_), &sb);
|
||||
if (rc == 0) {
|
||||
return S_ISDIR(sb.st_mode);
|
||||
|
@ -156,6 +157,16 @@ bool Path::IsDirectory() {
|
|||
}
|
||||
}
|
||||
|
||||
bool Path::IsFile() {
|
||||
struct stat sb {};
|
||||
int rc = stat(common::SafeCStr(path_), &sb);
|
||||
if (rc == 0) {
|
||||
return S_ISREG(sb.st_mode);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Status Path::CreateDirectory(bool is_common_dir) {
|
||||
if (!Exists()) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
@ -188,7 +199,7 @@ Status Path::CreateDirectory(bool is_common_dir) {
|
|||
}
|
||||
|
||||
std::string Path::ParentPath() {
|
||||
std::string r("");
|
||||
std::string r;
|
||||
std::size_t found = path_.find_last_of(separator_);
|
||||
if (found != std::string::npos) {
|
||||
if (found == 0) {
|
||||
|
|
|
@ -93,6 +93,8 @@ class Path {
|
|||
|
||||
bool IsDirectory();
|
||||
|
||||
bool IsFile();
|
||||
|
||||
Status CreateDirectory(bool is_common_dir = false);
|
||||
|
||||
Status CreateDirectories(bool is_common_dir = false);
|
||||
|
|
|
@ -19,6 +19,10 @@
|
|||
#include "utils/ms_utils.h"
|
||||
#include "./securec.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define stat _stat64 // for file size exceeds (1<<31)-1 bytes
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace mindrecord {
|
||||
// split a string using a character
|
||||
|
|
|
@ -489,7 +489,7 @@ TEST_F(MindDataTestPipeline, TestTFRecordDatasetExeception) {
|
|||
EXPECT_EQ(ds2->CreateIterator(), nullptr);
|
||||
|
||||
// This case expected to fail because the file of schema is not exist.
|
||||
std::shared_ptr<Dataset> ds4 = TFRecord({file_path, "notexist.json"});
|
||||
std::shared_ptr<Dataset> ds4 = TFRecord({file_path}, "notexist.json");
|
||||
EXPECT_EQ(ds4->CreateIterator(), nullptr);
|
||||
|
||||
// This case expected to fail because num_samples is negative.
|
||||
|
|
Loading…
Reference in New Issue