train code check return value
This commit is contained in:
parent
fa3e26ce61
commit
67b4fe9cea
|
@ -35,7 +35,11 @@ void AccuracyMonitor::Begin(const session::TrainLoopCallBackData &cb_data) {
|
|||
|
||||
int AccuracyMonitor::EpochEnd(const session::TrainLoopCallBackData &cb_data) {
|
||||
if ((static_cast<int>(cb_data.epoch_) + 1) % check_every_n_ == 0) {
|
||||
cb_data.loop_->Eval(ds_, {}, nullptr, max_steps_);
|
||||
auto ret = cb_data.loop_->Eval(ds_, {}, nullptr, max_steps_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Eval failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
accuracies_.push_back(std::make_pair(cb_data.epoch_, 0.0));
|
||||
return mindspore::session::RET_CONTINUE;
|
||||
|
|
|
@ -117,8 +117,8 @@ class OptimizerKernel : public InnerKernel {
|
|||
}
|
||||
|
||||
int RestoreDefaultLearningRate() {
|
||||
SetLearningRate(default_lr_);
|
||||
return RET_OK;
|
||||
auto ret = SetLearningRate(default_lr_);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int SetOptimizerMode(WeightUpdateMode mod) {
|
||||
|
@ -139,7 +139,11 @@ class OptimizerKernel : public InnerKernel {
|
|||
weight_update_mod_ = mod;
|
||||
} else {
|
||||
if (grad_sum_ != nullptr) {
|
||||
OptimizerStep();
|
||||
auto ret = OptimizerStep();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "OptimizerStep failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ms_context_->allocator->Free(grad_sum_);
|
||||
grad_sum_ = nullptr;
|
||||
}
|
||||
|
@ -169,7 +173,11 @@ class OptimizerKernel : public InnerKernel {
|
|||
|
||||
int Eval() override {
|
||||
if (weight_update_mod_ != WeightUpdateMode::ACCUMULATE_GRADS) {
|
||||
OptimizerStep();
|
||||
auto ret = OptimizerStep();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "OptimizerStep failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return InnerKernel::Eval();
|
||||
}
|
||||
|
|
|
@ -167,7 +167,11 @@ std::unique_ptr<schema::TensorT> TrainExport::CreateTensor(const mindspore::lite
|
|||
tensorT->enableHuffmanCode = false;
|
||||
if ((tensorT->nodeType == NodeType_ValueNode) && (scTensor->data() != nullptr) && (scTensor->data()->size() > 0)) {
|
||||
if (NeedQuantization(tensor)) {
|
||||
QuantTensorData(tensorT.get(), tensor, preferred_dim);
|
||||
auto ret = QuantTensorData(tensorT.get(), tensor, preferred_dim);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "QuantTensorData failed.";
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
tensorT->data = CreateData(tensor);
|
||||
}
|
||||
|
|
|
@ -498,8 +498,12 @@ int TrainSession::MixPrecisionExecKernels(const KernelCallBack &before, const Ke
|
|||
float scale = cfg_.mix_precision_cfg_.loss_scale_;
|
||||
for (auto *kernel : run_kernels) {
|
||||
MS_ASSERT(kernel != nullptr);
|
||||
MixPrecisionPreProcess(kernel, scale);
|
||||
auto ret = kernel->Execute(before, after);
|
||||
auto ret = MixPrecisionPreProcess(kernel, scale);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "MixPrecisionPreProcess failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = kernel->Execute(before, after);
|
||||
if (RET_OK != ret) {
|
||||
MixPrecisionPostProcess(kernel);
|
||||
// decrease loss scale in case of nan or inf
|
||||
|
@ -758,7 +762,11 @@ void TrainSession::CompileOptimizedKernels() {
|
|||
std::copy(kernel->in_tensors().begin(), kernel->in_tensors().end(), std::back_inserter(out_tensor));
|
||||
if (cfg_.accumulate_gradients_) {
|
||||
auto optimizer = static_cast<kernel::OptimizerKernel *>(kernel->kernel());
|
||||
optimizer->SetOptimizerMode(kernel::WeightUpdateMode::ACCUMULATE_GRADS);
|
||||
auto ret = optimizer->SetOptimizerMode(kernel::WeightUpdateMode::ACCUMULATE_GRADS);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "SetOptimizerMode failed.";
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -832,7 +840,7 @@ int TrainSession::SetOptimizerParams(const std::vector<tensor::MSTensor *> ¶
|
|||
for (size_t ix = 0; ix < params.size(); ix++) {
|
||||
auto param = params[ix];
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "Param tensor " << param->tensor_name() << " is null.";
|
||||
MS_LOG(ERROR) << "Param tensor is null.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
bool found = false;
|
||||
|
@ -876,7 +884,7 @@ int TrainSession::ApplyGradients(const std::vector<tensor::MSTensor *> &gradient
|
|||
for (size_t ix = 0; ix < gradients.size(); ix++) {
|
||||
auto gradient = gradients[ix];
|
||||
if (gradient == nullptr) {
|
||||
MS_LOG(ERROR) << "gradient tensor " << gradient->tensor_name() << " is null.";
|
||||
MS_LOG(ERROR) << "gradient tensor is null.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
bool found = false;
|
||||
|
@ -904,7 +912,10 @@ int TrainSession::ApplyGradients(const std::vector<tensor::MSTensor *> &gradient
|
|||
for (auto kernel : this->train_kernels_) {
|
||||
if (IsOptimizer(kernel)) {
|
||||
auto optimizer = static_cast<kernel::OptimizerKernel *>(kernel->kernel());
|
||||
optimizer->set_grad_sum_valid();
|
||||
if (optimizer->set_grad_sum_valid() != RET_OK) {
|
||||
MS_LOG(ERROR) << "set grad sum valid failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = optimizer->OptimizerStep();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "failed to optimize model weights";
|
||||
|
|
|
@ -195,7 +195,10 @@ int TransferSession::Export(const std::string &filename, ModelType model_type, Q
|
|||
}
|
||||
|
||||
bool orig_train_state = IsTrain();
|
||||
Eval();
|
||||
if (Eval() != RET_OK) {
|
||||
MS_LOG(ERROR) << "eval failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
TrainExport texport(filename);
|
||||
int status = texport.LoadModel(lite_model_, size_backbone_);
|
||||
if (status != RET_OK) {
|
||||
|
@ -231,7 +234,13 @@ int TransferSession::Export(const std::string &filename, ModelType model_type, Q
|
|||
MS_LOG(ERROR) << "failed to save to " << filename;
|
||||
return status;
|
||||
}
|
||||
if (orig_train_state) Train();
|
||||
if (orig_train_state) {
|
||||
auto ret = Train();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "train failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue