forked from mindspore-Ecosystem/mindspore
!8703 GPU fix padding and dataset_helper
From: @VectorSL Reviewed-by: Signed-off-by:
This commit is contained in:
commit
afde772128
|
@ -76,7 +76,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
||||||
|
|
||||||
const float alpha = 1;
|
const float alpha = 1;
|
||||||
const float beta = 0;
|
const float beta = 0;
|
||||||
if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_) {
|
if (use_pad_) {
|
||||||
T *padded_addr = GetDeviceAddress<T>(workspace, 1);
|
T *padded_addr = GetDeviceAddress<T>(workspace, 1);
|
||||||
if (data_format_ == kOpFormat_NHWC) {
|
if (data_format_ == kOpFormat_NHWC) {
|
||||||
CalPadNHWC(padded_size_ / sizeof(T), input_addr, n_, old_height_, old_width_, c_, old_height_ + pad_height_,
|
CalPadNHWC(padded_size_ / sizeof(T), input_addr, n_, old_height_, old_width_, c_, old_height_ + pad_height_,
|
||||||
|
@ -133,23 +133,18 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
||||||
[](const int64_t &value) { return static_cast<int>(value); });
|
[](const int64_t &value) { return static_cast<int>(value); });
|
||||||
pad_height_ = pad_list[0];
|
pad_height_ = pad_list[0];
|
||||||
pad_width_ = pad_list[2];
|
pad_width_ = pad_list[2];
|
||||||
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
|
use_pad_ = !((pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]));
|
||||||
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
||||||
SetStrideAndDilation(kernel_node);
|
SetStrideAndDilation(kernel_node);
|
||||||
cudnnTensorDescriptor_t input_descriptor_real = nullptr;
|
cudnnTensorDescriptor_t input_descriptor_real = nullptr;
|
||||||
int padA[2];
|
int padA[2];
|
||||||
int strideA[2] = {stride_[2], stride_[3]};
|
int strideA[2] = {stride_[2], stride_[3]};
|
||||||
int dilaA[2] = {dilation_[2], dilation_[3]};
|
int dilaA[2] = {dilation_[2], dilation_[3]};
|
||||||
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
|
if (use_pad_) {
|
||||||
pad_height_ = pad_list[0] + pad_list[1];
|
pad_height_ = pad_list[0] + pad_list[1];
|
||||||
pad_width_ = pad_list[2] + pad_list[3];
|
pad_width_ = pad_list[2] + pad_list[3];
|
||||||
pad_top_ = pad_list[0];
|
pad_top_ = pad_list[0];
|
||||||
pad_left_ = pad_list[2];
|
pad_left_ = pad_list[2];
|
||||||
|
|
||||||
// if use_pad_ == true, using zero padding in advance, else using the default cudnn pad.
|
|
||||||
if (pad_height_ % 2 == 0 && pad_width_ % 2 == 0) {
|
|
||||||
use_pad_ = false;
|
|
||||||
}
|
|
||||||
int dimA[4];
|
int dimA[4];
|
||||||
int strideApadded[4];
|
int strideApadded[4];
|
||||||
if (data_format_ == kOpFormat_NCHW || data_format_ == kOpFormat_DEFAULT) {
|
if (data_format_ == kOpFormat_NCHW || data_format_ == kOpFormat_DEFAULT) {
|
||||||
|
@ -165,18 +160,12 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
||||||
}
|
}
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(padded_desc_, cudnn_data_type_, 4, dimA, strideApadded),
|
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(padded_desc_, cudnn_data_type_, 4, dimA, strideApadded),
|
||||||
"cudnnSetTensor4dDescriptor failed");
|
"cudnnSetTensor4dDescriptor failed");
|
||||||
|
padA[0] = 0;
|
||||||
if (use_pad_) {
|
padA[1] = 0;
|
||||||
padA[0] = 0;
|
|
||||||
padA[1] = 0;
|
|
||||||
} else {
|
|
||||||
padA[0] = pad_top_;
|
|
||||||
padA[1] = pad_left_;
|
|
||||||
}
|
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||||
cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
|
cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
|
||||||
"cudnnSetConvolutionNdDescriptor failed");
|
"cudnnSetConvolutionNdDescriptor failed");
|
||||||
input_descriptor_real = use_pad_ ? padded_desc_ : input_desc_;
|
input_descriptor_real = padded_desc_;
|
||||||
} else {
|
} else {
|
||||||
if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) {
|
if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) {
|
||||||
pad_height_ = 0;
|
pad_height_ = 0;
|
||||||
|
@ -232,7 +221,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
||||||
input_size_list_.push_back(input_size_);
|
input_size_list_.push_back(input_size_);
|
||||||
input_size_list_.push_back(filter_size_);
|
input_size_list_.push_back(filter_size_);
|
||||||
output_size_list_.push_back(output_size_);
|
output_size_list_.push_back(output_size_);
|
||||||
if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_ && !is_null_input_) {
|
if (use_pad_ && !is_null_input_) {
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||||
cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle_, padded_desc_, filter_desc_, conv_desc_, output_desc_,
|
cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle_, padded_desc_, filter_desc_, conv_desc_, output_desc_,
|
||||||
conv_algorithm_, &workspace_size_),
|
conv_algorithm_, &workspace_size_),
|
||||||
|
|
|
@ -78,7 +78,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
||||||
const float alpha = 1;
|
const float alpha = 1;
|
||||||
const float beta = 0;
|
const float beta = 0;
|
||||||
|
|
||||||
if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_) {
|
if (use_pad_) {
|
||||||
T *padded = GetDeviceAddress<T>(workspace, 1);
|
T *padded = GetDeviceAddress<T>(workspace, 1);
|
||||||
if (data_format_ == kOpFormat_NHWC) {
|
if (data_format_ == kOpFormat_NHWC) {
|
||||||
CalPadNHWC(padded_size_ / sizeof(T), x, n_, old_height_, old_width_, c_, old_height_ + pad_height_,
|
CalPadNHWC(padded_size_ / sizeof(T), x, n_, old_height_, old_width_, c_, old_height_ + pad_height_,
|
||||||
|
@ -136,14 +136,14 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
||||||
[](const int64_t &value) { return static_cast<int>(value); });
|
[](const int64_t &value) { return static_cast<int>(value); });
|
||||||
pad_height_ = pad_list[0];
|
pad_height_ = pad_list[0];
|
||||||
pad_width_ = pad_list[2];
|
pad_width_ = pad_list[2];
|
||||||
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
|
use_pad_ = !((pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]));
|
||||||
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
||||||
SetStrideAndDilation(kernel_node);
|
SetStrideAndDilation(kernel_node);
|
||||||
cudnnTensorDescriptor_t x_desc_real = nullptr;
|
cudnnTensorDescriptor_t x_desc_real = nullptr;
|
||||||
int padA[2];
|
int padA[2];
|
||||||
int strideA[2] = {stride_[0], stride_[1]};
|
int strideA[2] = {stride_[0], stride_[1]};
|
||||||
int dilaA[2] = {dilation_[2], dilation_[3]};
|
int dilaA[2] = {dilation_[2], dilation_[3]};
|
||||||
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
|
if (use_pad_) {
|
||||||
pad_height_ = pad_list[0] + pad_list[1];
|
pad_height_ = pad_list[0] + pad_list[1];
|
||||||
pad_width_ = pad_list[2] + pad_list[3];
|
pad_width_ = pad_list[2] + pad_list[3];
|
||||||
pad_top_ = pad_list[0];
|
pad_top_ = pad_list[0];
|
||||||
|
@ -167,17 +167,12 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||||
cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded),
|
cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded),
|
||||||
"cudnnSetTensor4dDescriptor failed");
|
"cudnnSetTensor4dDescriptor failed");
|
||||||
if (use_pad_) {
|
padA[0] = 0;
|
||||||
padA[0] = 0;
|
padA[1] = 0;
|
||||||
padA[1] = 0;
|
|
||||||
} else {
|
|
||||||
padA[0] = pad_top_;
|
|
||||||
padA[1] = pad_left_;
|
|
||||||
}
|
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||||
cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
|
cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
|
||||||
"cudnnSetConvolutionNdDescriptor failed");
|
"cudnnSetConvolutionNdDescriptor failed");
|
||||||
x_desc_real = use_pad_ ? padded_descriptor_ : x_desc_;
|
x_desc_real = padded_descriptor_;
|
||||||
} else {
|
} else {
|
||||||
if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) {
|
if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) {
|
||||||
pad_height_ = 0;
|
pad_height_ = 0;
|
||||||
|
@ -231,7 +226,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
||||||
input_size_list_.push_back(input_size_);
|
input_size_list_.push_back(input_size_);
|
||||||
output_size_list_.push_back(output_size_);
|
output_size_list_.push_back(output_size_);
|
||||||
|
|
||||||
if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_ && !is_null_input_) {
|
if (use_pad_ && !is_null_input_) {
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||||
cudnnGetTensorSizeInBytes(padded_descriptor_, reinterpret_cast<size_t *>(&padded_size_)),
|
cudnnGetTensorSizeInBytes(padded_descriptor_, reinterpret_cast<size_t *>(&padded_size_)),
|
||||||
"cudnnGetTensorSizeInBytes failed");
|
"cudnnGetTensorSizeInBytes failed");
|
||||||
|
|
|
@ -77,7 +77,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
||||||
}
|
}
|
||||||
|
|
||||||
const float alpha = 1;
|
const float alpha = 1;
|
||||||
if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_) {
|
if (use_pad_) {
|
||||||
T *padded = GetDeviceAddress<T>(workspace, 1);
|
T *padded = GetDeviceAddress<T>(workspace, 1);
|
||||||
|
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||||
|
@ -139,14 +139,14 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
||||||
[](const int64_t &value) { return static_cast<int>(value); });
|
[](const int64_t &value) { return static_cast<int>(value); });
|
||||||
pad_height_ = pad_list[0];
|
pad_height_ = pad_list[0];
|
||||||
pad_width_ = pad_list[2];
|
pad_width_ = pad_list[2];
|
||||||
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
|
use_pad_ = !((pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]));
|
||||||
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
||||||
SetStrideAndDilation(kernel_node);
|
SetStrideAndDilation(kernel_node);
|
||||||
cudnnTensorDescriptor_t dx_desc_real = nullptr;
|
cudnnTensorDescriptor_t dx_desc_real = nullptr;
|
||||||
int padA[2];
|
int padA[2];
|
||||||
int strideA[2] = {stride_[0], stride_[1]};
|
int strideA[2] = {stride_[0], stride_[1]};
|
||||||
int dilaA[2] = {dilation_[2], dilation_[3]};
|
int dilaA[2] = {dilation_[2], dilation_[3]};
|
||||||
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
|
if (use_pad_) {
|
||||||
pad_height_ = pad_list[0] + pad_list[1];
|
pad_height_ = pad_list[0] + pad_list[1];
|
||||||
pad_width_ = pad_list[2] + pad_list[3];
|
pad_width_ = pad_list[2] + pad_list[3];
|
||||||
pad_top_ = pad_list[0];
|
pad_top_ = pad_list[0];
|
||||||
|
@ -170,17 +170,12 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||||
cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded),
|
cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded),
|
||||||
"cudnnSetTensor4dDescriptor failed");
|
"cudnnSetTensor4dDescriptor failed");
|
||||||
if (use_pad_) {
|
padA[0] = 0;
|
||||||
padA[0] = 0;
|
padA[1] = 0;
|
||||||
padA[1] = 0;
|
|
||||||
} else {
|
|
||||||
padA[0] = pad_top_;
|
|
||||||
padA[1] = pad_left_;
|
|
||||||
}
|
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||||
cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
|
cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
|
||||||
"cudnnSetConvolutionNdDescriptor failed");
|
"cudnnSetConvolutionNdDescriptor failed");
|
||||||
dx_desc_real = use_pad_ ? padded_descriptor_ : dx_desc_;
|
dx_desc_real = padded_descriptor_;
|
||||||
} else {
|
} else {
|
||||||
if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) {
|
if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) {
|
||||||
pad_height_ = 0;
|
pad_height_ = 0;
|
||||||
|
@ -233,7 +228,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
||||||
input_size_list_.push_back(w_size_);
|
input_size_list_.push_back(w_size_);
|
||||||
output_size_list_.push_back(output_size_);
|
output_size_list_.push_back(output_size_);
|
||||||
|
|
||||||
if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_ && !is_null_input_) {
|
if (use_pad_ && !is_null_input_) {
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(padded_descriptor_, &padded_size_),
|
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(padded_descriptor_, &padded_size_),
|
||||||
"cudnnGetTensorSizeInBytes failed");
|
"cudnnGetTensorSizeInBytes failed");
|
||||||
|
|
||||||
|
|
|
@ -662,8 +662,9 @@ void Pipeline::Run() {
|
||||||
auto manager = func_graph->manager();
|
auto manager = func_graph->manager();
|
||||||
size_t graph_nums = manager->func_graphs().size();
|
size_t graph_nums = manager->func_graphs().size();
|
||||||
if (graph_nums == 1) {
|
if (graph_nums == 1) {
|
||||||
resource_->set_gpu_loopsink_flag(true);
|
int64_t sinksize = ConfigManager::GetInstance().iter_num();
|
||||||
MS_LOG(INFO) << "Change gpu_loopsink_flag_ to true.";
|
resource_->set_gpu_loopsink(true, sinksize);
|
||||||
|
MS_LOG(INFO) << "Change gpu_loopsink_flag_ to true,set loopsink size to " << sinksize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -834,7 +835,7 @@ py::object ExecutorPy::Run(const py::tuple &args, const py::object &phase) {
|
||||||
}
|
}
|
||||||
// Set loopsink size for each phase.
|
// Set loopsink size for each phase.
|
||||||
bool is_loopsink = info_[phase_s]->resource->gpu_loopsink_flag();
|
bool is_loopsink = info_[phase_s]->resource->gpu_loopsink_flag();
|
||||||
int64_t sinksize = ConfigManager::GetInstance().iter_num();
|
int64_t sinksize = info_[phase_s]->resource->gpu_loopsink_size();
|
||||||
ConfigManager::GetInstance().set_gpu_loopsink_size(is_loopsink ? sinksize : 1);
|
ConfigManager::GetInstance().set_gpu_loopsink_size(is_loopsink ? sinksize : 1);
|
||||||
// If target is not gpu or is loopsink, keep vmloop 1.
|
// If target is not gpu or is loopsink, keep vmloop 1.
|
||||||
bool g = (MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice);
|
bool g = (MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice);
|
||||||
|
|
|
@ -74,8 +74,12 @@ class Resource : public ResourceBase {
|
||||||
const abstract::AbstractBasePtrList &args_spec() const { return args_spec_; }
|
const abstract::AbstractBasePtrList &args_spec() const { return args_spec_; }
|
||||||
void set_args_spec(const abstract::AbstractBasePtrList &args_spec) { args_spec_ = args_spec; }
|
void set_args_spec(const abstract::AbstractBasePtrList &args_spec) { args_spec_ = args_spec; }
|
||||||
|
|
||||||
void set_gpu_loopsink_flag(const bool &flag) { gpu_loopsink_flag_ = flag; }
|
void set_gpu_loopsink(const bool &flag, const int64_t size) {
|
||||||
|
gpu_loopsink_flag_ = flag;
|
||||||
|
gpu_loopsink_size_ = size;
|
||||||
|
}
|
||||||
bool gpu_loopsink_flag() { return gpu_loopsink_flag_; }
|
bool gpu_loopsink_flag() { return gpu_loopsink_flag_; }
|
||||||
|
int64_t gpu_loopsink_size() { return gpu_loopsink_size_; }
|
||||||
|
|
||||||
// Reclaim resource and clear the cache.
|
// Reclaim resource and clear the cache.
|
||||||
// ExecutorPy::Compile() can be called multiple times, so cache
|
// ExecutorPy::Compile() can be called multiple times, so cache
|
||||||
|
@ -89,6 +93,7 @@ class Resource : public ResourceBase {
|
||||||
py::object input_;
|
py::object input_;
|
||||||
bool is_cleaned_;
|
bool is_cleaned_;
|
||||||
bool gpu_loopsink_flag_{false};
|
bool gpu_loopsink_flag_{false};
|
||||||
|
int64_t gpu_loopsink_size_{1};
|
||||||
};
|
};
|
||||||
|
|
||||||
using ResourcePtr = std::shared_ptr<pipeline::Resource>;
|
using ResourcePtr = std::shared_ptr<pipeline::Resource>;
|
||||||
|
|
|
@ -129,6 +129,8 @@ class DatasetHelper:
|
||||||
Validator.check_is_int(sink_size)
|
Validator.check_is_int(sink_size)
|
||||||
if sink_size < -1 or sink_size == 0:
|
if sink_size < -1 or sink_size == 0:
|
||||||
raise ValueError("The sink_size must be -1 or positive, but got sink_size {}.".format(sink_size))
|
raise ValueError("The sink_size must be -1 or positive, but got sink_size {}.".format(sink_size))
|
||||||
|
if sink_size == -1:
|
||||||
|
sink_size = dataset.get_dataset_size()
|
||||||
|
|
||||||
if dataset_sink_mode:
|
if dataset_sink_mode:
|
||||||
if context.get_context("enable_ge"):
|
if context.get_context("enable_ge"):
|
||||||
|
|
|
@ -14,15 +14,14 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# an simple tutorial as follows, more parameters can be setting
|
# an simple tutorial as follows, more parameters can be setting
|
||||||
if [ $# != 3 ]
|
if [ $# != 2 ]
|
||||||
then
|
then
|
||||||
echo "Usage: sh run_standalone_train_gpu.sh [cifar10|imagenet] [DATA_PATH] [DEVICE_ID]"
|
echo "Usage: sh run_standalone_train_gpu.sh [cifar10|imagenet] [DATA_PATH]"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export DATASET_NAME=$1
|
export DATASET_NAME=$1
|
||||||
export DATA_PATH=$2
|
export DATA_PATH=$2
|
||||||
export DEVICE_ID=$3
|
|
||||||
|
|
||||||
python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
|
python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
|
||||||
--device_id=$DEVICE_ID --device_target="GPU" > log 2>&1 &
|
--device_target="GPU" > log 2>&1 &
|
||||||
|
|
|
@ -345,11 +345,11 @@ epoch: 5 step: 5004, loss is 3.3501816
|
||||||
|
|
||||||
```
|
```
|
||||||
# ========START RESNET50 GPU BENCHMARK========
|
# ========START RESNET50 GPU BENCHMARK========
|
||||||
step time: 12416.098 ms, fps: 412 img/sec. epoch: 1 step: 20, loss is 6.940182
|
Epoch time: 12416.098 ms, fps: 412 img/sec. epoch: 1 step: 20, loss is 6.940182
|
||||||
step time: 3472.037 ms, fps: 1474 img/sec. epoch: 2 step: 20, loss is 7.078993
|
Epoch time: 3472.037 ms, fps: 1474 img/sec. epoch: 2 step: 20, loss is 7.078993
|
||||||
step time: 3469.523 ms, fps: 1475 img/sec. epoch: 3 step: 20, loss is 7.559594
|
Epoch time: 3469.523 ms, fps: 1475 img/sec. epoch: 3 step: 20, loss is 7.559594
|
||||||
step time: 3460.311 ms, fps: 1479 img/sec. epoch: 4 step: 20, loss is 6.920937
|
Epoch time: 3460.311 ms, fps: 1479 img/sec. epoch: 4 step: 20, loss is 6.920937
|
||||||
step time: 3460.543 ms, fps: 1479 img/sec. epoch: 5 step: 20, loss is 6.814013
|
Epoch time: 3460.543 ms, fps: 1479 img/sec. epoch: 5 step: 20, loss is 6.814013
|
||||||
...
|
...
|
||||||
```
|
```
|
||||||
## [Evaluation Process](#contents)
|
## [Evaluation Process](#contents)
|
||||||
|
|
|
@ -53,7 +53,7 @@ class MyTimeMonitor(Callback):
|
||||||
def step_end(self, run_context):
|
def step_end(self, run_context):
|
||||||
step_mseconds = (time.time() - self.step_time) * 1000
|
step_mseconds = (time.time() - self.step_time) * 1000
|
||||||
fps = self.batch_size / step_mseconds *1000 * self.size
|
fps = self.batch_size / step_mseconds *1000 * self.size
|
||||||
print("step time: {:5.3f} ms, fps: {:d} img/sec.".format(step_mseconds, int(fps)), flush=True, end=" ")
|
print("Epoch time: {:5.3f} ms, fps: {:d} img/sec.".format(step_mseconds, int(fps)), flush=True, end=" ")
|
||||||
|
|
||||||
def pad(image):
|
def pad(image):
|
||||||
zeros = np.zeros([224, 224, 1], dtype=np.uint8)
|
zeros = np.zeros([224, 224, 1], dtype=np.uint8)
|
||||||
|
|
Loading…
Reference in New Issue