load_ckpt debug

This commit is contained in:
Payne 2020-09-15 22:09:03 +08:00
parent e39775edfd
commit 93c4d2929c
4 changed files with 26 additions and 19 deletions

View File

@ -46,6 +46,10 @@ if __name__ == '__main__':
dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, config=config) dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, config=config)
step_size = dataset.get_dataset_size() step_size = dataset.get_dataset_size()
if step_size == 0:
raise ValueError("The step_size of dataset is zero. Check if the images count of train dataset is more \
than batch_size in config.py")
net.set_train(False) net.set_train(False)
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

View File

@ -16,7 +16,6 @@
create train or eval dataset. create train or eval dataset.
""" """
import os import os
from tqdm import tqdm
import numpy as np import numpy as np
from mindspore import Tensor from mindspore import Tensor
@ -109,19 +108,20 @@ def extract_features(net, dataset_path, config):
config=config, config=config,
repeat_num=1) repeat_num=1)
step_size = dataset.get_dataset_size() step_size = dataset.get_dataset_size()
pbar = tqdm(list(dataset.create_dict_iterator())) if step_size == 0:
raise ValueError("The step_size of dataset is zero. Check if the images count of train dataset is more \
than batch_size in config.py")
model = Model(net) model = Model(net)
i = 0 for i, data in enumerate(dataset.create_dict_iterator()):
for data in pbar:
features_path = os.path.join(features_folder, f"feature_{i}.npy") features_path = os.path.join(features_folder, f"feature_{i}.npy")
label_path = os.path.join(features_folder, f"label_{i}.npy") label_path = os.path.join(features_folder, f"label_{i}.npy")
if not (os.path.exists(features_path) and os.path.exists(label_path)): if not os.path.exists(features_path) or not os.path.exists(label_path):
image = data["image"] image = data["image"]
label = data["label"] label = data["label"]
features = model.predict(Tensor(image)) features = model.predict(Tensor(image))
np.save(features_path, features.asnumpy()) np.save(features_path, features.asnumpy())
np.save(label_path, label) np.save(label_path, label)
pbar.set_description("Process dataset batch: %d" % (i + 1)) print(f"Complete the batch {i}/{step_size}")
i += 1
return step_size return step_size

View File

@ -330,8 +330,12 @@ class MobileNetV2(nn.Cell):
MobileNetV2 architecture. MobileNetV2 architecture.
Args: Args:
backbone(nn.Cell): class_num (int): number of classes.
head(nn.Cell): width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1.
has_dropout (bool): Is dropout used. Default is false
inverted_residual_setting (list): Inverted residual settings. Default is None
round_nearest (list): Channel round to . Default is 8
Returns: Returns:
Tensor, output tensor. Tensor, output tensor.
@ -355,14 +359,11 @@ class MobileNetV2(nn.Cell):
class MobileNetV2Combine(nn.Cell): class MobileNetV2Combine(nn.Cell):
""" """
MobileNetV2 architecture. MobileNetV2Combine architecture.
Args: Args:
class_num (Cell): number of classes. backbone(Cell): The features extract layers.
width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1. head(Cell): The fully connected layer.
has_dropout (bool): Is dropout used. Default is false
inverted_residual_setting (list): Inverted residual settings. Default is None
round_nearest (list): Channel round to . Default is 8
Returns: Returns:
Tensor, output tensor. Tensor, output tensor.
@ -371,7 +372,7 @@ class MobileNetV2Combine(nn.Cell):
""" """
def __init__(self, backbone, head): def __init__(self, backbone, head):
super(MobileNetV2Combine, self).__init__() super(MobileNetV2Combine, self).__init__(auto_prefix=False)
self.backbone = backbone self.backbone = backbone
self.head = head self.head = head

View File

@ -62,6 +62,9 @@ if __name__ == '__main__':
raise ValueError("Currently, CPU only support \"incremental_learn\", not \"fine_tune\" or \"train\".") raise ValueError("Currently, CPU only support \"incremental_learn\", not \"fine_tune\" or \"train\".")
dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, config=config) dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, config=config)
step_size = dataset.get_dataset_size() step_size = dataset.get_dataset_size()
if step_size == 0:
raise ValueError("The step_size of dataset is zero. Check if the images count of train dataset is more \
than batch_size in config.py")
# Currently, only Ascend support switch precision. # Currently, only Ascend support switch precision.
switch_precision(net, mstype.float16, config) switch_precision(net, mstype.float16, config)
@ -108,9 +111,8 @@ if __name__ == '__main__':
losses.append(network(feature, label).asnumpy()) losses.append(network(feature, label).asnumpy())
epoch_mseconds = (time.time()-epoch_start) * 1000 epoch_mseconds = (time.time()-epoch_start) * 1000
per_step_mseconds = epoch_mseconds / step_size per_step_mseconds = epoch_mseconds / step_size
print("\r epoch[{}], iter[{}] cost: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}"\ print("epoch[{}], iter[{}] cost: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}"\
.format(epoch + 1, step_size, epoch_mseconds, per_step_mseconds, np.mean(np.array(losses))), \ .format(epoch + 1, step_size, epoch_mseconds, per_step_mseconds, np.mean(np.array(losses))))
end="")
if (epoch + 1) % config.save_checkpoint_epochs == 0: if (epoch + 1) % config.save_checkpoint_epochs == 0:
_exec_save_checkpoint(network, os.path.join(config.save_checkpoint_path, \ _exec_save_checkpoint(network, os.path.join(config.save_checkpoint_path, \
f"mobilenetv2_head_{epoch+1}.ckpt")) f"mobilenetv2_head_{epoch+1}.ckpt"))