Add num_epochs to non-sink training
This commit is contained in:
parent
e69d868016
commit
9cee0d2143
|
@ -63,7 +63,7 @@ class SVI:
|
|||
for _ in range(1, epochs+1):
|
||||
train_loss = 0
|
||||
dataset_size = 0
|
||||
for data in train_dataset.create_dict_iterator():
|
||||
for data in train_dataset.create_dict_iterator(num_epochs=1):
|
||||
x = Tensor(data['image'], dtype=mstype.float32)
|
||||
y = Tensor(data['label'], dtype=mstype.int32)
|
||||
dataset_size += len(x)
|
||||
|
|
|
@ -145,7 +145,7 @@ class DatasetHelper:
|
|||
self.iter = iterclass(dataset, sink_size, epoch_num)
|
||||
else:
|
||||
iterclass = _DatasetIterNormal
|
||||
self.iter = iterclass(dataset)
|
||||
self.iter = iterclass(dataset, epoch_num=epoch_num)
|
||||
|
||||
def __iter__(self):
|
||||
return self.iter.__iter__()
|
||||
|
@ -290,11 +290,12 @@ class _DatasetIterPSLite(_DatasetIter):
|
|||
|
||||
class _DatasetIterNormal:
|
||||
"""Iter for normal(non sink) mode, feed the data from host."""
|
||||
def __init__(self, dataset):
|
||||
|
||||
def __init__(self, dataset, epoch_num=-1):
|
||||
self.dataset = dataset
|
||||
self.device_num = _get_device_num()
|
||||
self.global_rank = _get_global_rank()
|
||||
self.iter = self.dataset.create_tuple_iterator()
|
||||
self.iter = self.dataset.create_tuple_iterator(num_epochs=epoch_num)
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
|
|
@ -460,7 +460,8 @@ class Model:
|
|||
is_train=True,
|
||||
phase='train',
|
||||
dataset=train_dataset,
|
||||
dataset_sink_mode=False)
|
||||
dataset_sink_mode=False,
|
||||
epoch_num=epoch)
|
||||
cb_params.cur_step_num = 0
|
||||
run_context = RunContext(cb_params)
|
||||
list_callback.begin(run_context)
|
||||
|
|
|
@ -57,7 +57,7 @@ def FasterRcnn_eval(dataset_path, ckpt_path, ann_file):
|
|||
print("total images num: ", total)
|
||||
print("Processing, please wait a moment.")
|
||||
max_num = 128
|
||||
for data in ds.create_dict_iterator():
|
||||
for data in ds.create_dict_iterator(num_epochs=1):
|
||||
eval_iter = eval_iter + 1
|
||||
|
||||
img_data = data['image']
|
||||
|
|
|
@ -57,7 +57,7 @@ def MaskRcnn_eval(dataset_path, ckpt_path, ann_file):
|
|||
print("total images num: ", total)
|
||||
print("Processing, please wait a moment.")
|
||||
max_num = 128
|
||||
for data in ds.create_dict_iterator(output_numpy=True):
|
||||
for data in ds.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
eval_iter = eval_iter + 1
|
||||
|
||||
img_data = data['image']
|
||||
|
|
|
@ -200,7 +200,7 @@ def test(cloud_args=None):
|
|||
per_batch_size=args.per_batch_size,
|
||||
max_epoch=1, rank=args.rank, group_size=args.group_size,
|
||||
mode='eval')
|
||||
eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True)
|
||||
eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True, num_epochs=1)
|
||||
network = get_network(args.backbone, num_classes=args.num_classes, platform=args.platform)
|
||||
if network is None:
|
||||
raise NotImplementedError('not implement {}'.format(args.backbone))
|
||||
|
|
|
@ -44,7 +44,7 @@ def ssd_eval(dataset_path, ckpt_path):
|
|||
print("\n========================================\n")
|
||||
print("total images num: ", total)
|
||||
print("Processing, please wait a moment.")
|
||||
for data in ds.create_dict_iterator(output_numpy=True):
|
||||
for data in ds.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
img_id = data['img_id']
|
||||
img_np = data['image']
|
||||
image_shape = data['image_shape']
|
||||
|
|
|
@ -159,7 +159,7 @@ def test(cloud_args=None):
|
|||
|
||||
for model in args.models:
|
||||
dataset = classification_dataset(args.data_path, args.image_size, args.per_batch_size, mode='eval')
|
||||
eval_dataloader = dataset.create_tuple_iterator(output_numpy=True)
|
||||
eval_dataloader = dataset.create_tuple_iterator(output_numpy=True, num_epochs=1)
|
||||
network = vgg16(args.num_classes, args, phase="test")
|
||||
|
||||
# pre_trained
|
||||
|
|
|
@ -299,7 +299,7 @@ def test():
|
|||
|
||||
input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
|
||||
args.logger.info('Start inference....')
|
||||
for i, data in enumerate(ds.create_dict_iterator()):
|
||||
for i, data in enumerate(ds.create_dict_iterator(num_epochs=1)):
|
||||
image = data["image"]
|
||||
|
||||
image_shape = data["image_shape"]
|
||||
|
|
|
@ -239,7 +239,7 @@ def train():
|
|||
|
||||
old_progress = -1
|
||||
t_end = time.time()
|
||||
data_loader = ds.create_dict_iterator(output_numpy=True)
|
||||
data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1)
|
||||
|
||||
for i, data in enumerate(data_loader):
|
||||
images = data["image"]
|
||||
|
|
|
@ -305,7 +305,7 @@ def test():
|
|||
|
||||
input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
|
||||
args.logger.info('Start inference....')
|
||||
for i, data in enumerate(ds.create_dict_iterator()):
|
||||
for i, data in enumerate(ds.create_dict_iterator(num_epochs=1)):
|
||||
image = data["image"]
|
||||
|
||||
image_shape = data["image_shape"]
|
||||
|
|
|
@ -224,7 +224,7 @@ def train():
|
|||
|
||||
old_progress = -1
|
||||
t_end = time.time()
|
||||
data_loader = ds.create_dict_iterator(output_numpy=True)
|
||||
data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1)
|
||||
|
||||
shape_record = ShapeRecord()
|
||||
for i, data in enumerate(data_loader):
|
||||
|
|
|
@ -44,7 +44,7 @@ def yolo_eval(dataset_path, ckpt_path):
|
|||
print("\n========================================\n")
|
||||
print("total images num: ", total)
|
||||
print("Processing, please wait a moment.")
|
||||
for data in ds.create_dict_iterator(output_numpy=True):
|
||||
for data in ds.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
img_np = data['image']
|
||||
image_shape = data['image_shape']
|
||||
annotation = data['annotation']
|
||||
|
|
|
@ -119,7 +119,7 @@ def do_eval(dataset=None, network=None, num_class=2, assessment_method="accuracy
|
|||
raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
|
||||
|
||||
columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
|
||||
for data in dataset.create_dict_iterator():
|
||||
for data in dataset.create_dict_iterator(num_epochs=1):
|
||||
input_data = []
|
||||
for i in columns_list:
|
||||
input_data.append(data[i])
|
||||
|
|
|
@ -128,7 +128,7 @@ def do_eval(dataset=None, network=None, use_crf="", num_class=2, assessment_meth
|
|||
raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
|
||||
|
||||
columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
|
||||
for data in dataset.create_dict_iterator():
|
||||
for data in dataset.create_dict_iterator(num_epochs=1):
|
||||
input_data = []
|
||||
for i in columns_list:
|
||||
input_data.append(data[i])
|
||||
|
|
|
@ -109,7 +109,7 @@ def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="",
|
|||
output = []
|
||||
RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])
|
||||
columns_list = ["input_ids", "input_mask", "segment_ids", "unique_ids"]
|
||||
for data in dataset.create_dict_iterator():
|
||||
for data in dataset.create_dict_iterator(num_epochs=1):
|
||||
input_data = []
|
||||
for i in columns_list:
|
||||
input_data.append(data[i])
|
||||
|
|
|
@ -107,7 +107,7 @@ def transformer_infer(config, dataset):
|
|||
probs = []
|
||||
source_sentences = []
|
||||
target_sentences = []
|
||||
for batch in dataset.create_dict_iterator(output_numpy=True):
|
||||
for batch in dataset.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
source_sentences.append(batch["source_eos_ids"])
|
||||
target_sentences.append(batch["target_eos_ids"])
|
||||
|
||||
|
@ -232,7 +232,7 @@ def transformer_infer_ppl(config, dataset):
|
|||
lengths = []
|
||||
source_sentences = []
|
||||
target_sentences = []
|
||||
for batch in dataset.create_dict_iterator(output_numpy=True):
|
||||
for batch in dataset.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
source_sentences.append(batch["source_eos_ids"])
|
||||
target_sentences.append(batch["target_eos_ids"])
|
||||
|
||||
|
|
|
@ -278,7 +278,7 @@ def do_eval_standalone():
|
|||
|
||||
callback = Accuracy()
|
||||
columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
|
||||
for data in eval_dataset.create_dict_iterator():
|
||||
for data in eval_dataset.create_dict_iterator(num_epochs=1):
|
||||
input_data = []
|
||||
for i in columns_list:
|
||||
input_data.append(data[i])
|
||||
|
|
|
@ -93,7 +93,7 @@ class EvalCallBack(Callback):
|
|||
if cb_params.cur_step_num % 100 == 0:
|
||||
callback = Accuracy()
|
||||
columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
|
||||
for data in self.dataset.create_dict_iterator():
|
||||
for data in self.dataset.create_dict_iterator(num_epochs=1):
|
||||
input_data = []
|
||||
for i in columns_list:
|
||||
input_data.append(data[i])
|
||||
|
|
|
@ -113,7 +113,7 @@ def run_transformer_eval():
|
|||
predictions = []
|
||||
source_sents = []
|
||||
target_sents = []
|
||||
for batch in dataset.create_dict_iterator(output_numpy=True):
|
||||
for batch in dataset.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
source_sents.append(batch["source_eos_ids"])
|
||||
target_sents.append(batch["target_eos_ids"])
|
||||
source_ids = Tensor(batch["source_eos_ids"], mstype.int32)
|
||||
|
|
|
@ -22,7 +22,7 @@ def create_dataset(data_file):
|
|||
num_parallel_workers=num_readers,
|
||||
shuffle=True)
|
||||
index = 0
|
||||
for item in data_set.create_dict_iterator(output_numpy=True):
|
||||
for item in data_set.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
print("example {}: {}".format(index, item))
|
||||
index += 1
|
||||
if index % 1000 == 0:
|
||||
|
|
|
@ -28,7 +28,7 @@ args = parser.parse_args()
|
|||
|
||||
data_set = ds.MindDataset(args.path)
|
||||
num_iter = 0
|
||||
for item in data_set.create_dict_iterator(output_numpy=True):
|
||||
for item in data_set.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
print(item)
|
||||
num_iter += 1
|
||||
print("Total items # is {}".format(num_iter))
|
||||
|
|
|
@ -22,7 +22,7 @@ def create_dataset(data_file):
|
|||
num_parallel_workers=num_readers,
|
||||
shuffle=True)
|
||||
index = 0
|
||||
for item in data_set.create_dict_iterator(output_numpy=True):
|
||||
for item in data_set.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
print("example {}: {}".format(index, item))
|
||||
index += 1
|
||||
if index % 1000 == 0:
|
||||
|
|
|
@ -22,7 +22,7 @@ def create_dataset(data_file):
|
|||
num_parallel_workers=num_readers,
|
||||
shuffle=True)
|
||||
index = 0
|
||||
for item in data_set.create_dict_iterator(output_numpy=True):
|
||||
for item in data_set.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
print("example {}: {}".format(index, item))
|
||||
index += 1
|
||||
if index % 1000 == 0:
|
||||
|
|
|
@ -55,7 +55,7 @@ class MindData:
|
|||
self.send_epoch_end = send_epoch_end
|
||||
return self
|
||||
|
||||
def create_tuple_iterator(self):
|
||||
def create_tuple_iterator(self, num_epochs=-1):
|
||||
return self.__iter__()
|
||||
|
||||
def send(self, num_epochs=-1):
|
||||
|
|
|
@ -48,7 +48,7 @@ def use_minddataset(mindrecord):
|
|||
columns_list=columns_list,
|
||||
num_parallel_workers=4)
|
||||
num_iter = 0
|
||||
for _ in data_set.create_dict_iterator():
|
||||
for _ in data_set.create_dict_iterator(num_epochs=1):
|
||||
num_iter += 1
|
||||
print_log(num_iter)
|
||||
end = time.time()
|
||||
|
@ -64,7 +64,7 @@ def use_tfrecorddataset(tfrecord):
|
|||
shuffle=ds.Shuffle.GLOBAL)
|
||||
data_set = data_set.shuffle(10000)
|
||||
num_iter = 0
|
||||
for _ in data_set.create_dict_iterator():
|
||||
for _ in data_set.create_dict_iterator(num_epochs=1):
|
||||
num_iter += 1
|
||||
print_log(num_iter)
|
||||
end = time.time()
|
||||
|
|
|
@ -96,7 +96,7 @@ if __name__ == '__main__':
|
|||
dataset_types, dataset_shapes, (), 'dataset')
|
||||
ds1.send()
|
||||
|
||||
for data in data_set.create_tuple_iterator(output_numpy=True):
|
||||
for data in data_set.create_tuple_iterator(output_numpy=True, num_epochs=1):
|
||||
output = net()
|
||||
print(data[0].any())
|
||||
print(
|
||||
|
|
|
@ -92,7 +92,7 @@ class BNNLeNet5(nn.Cell):
|
|||
def train_model(train_net, net, dataset):
|
||||
accs = []
|
||||
loss_sum = 0
|
||||
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
|
||||
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True, num_epochs=1)):
|
||||
train_x = Tensor(data['image'].astype(np.float32))
|
||||
label = Tensor(data['label'].astype(np.int32))
|
||||
loss = train_net(train_x, label)
|
||||
|
@ -109,7 +109,7 @@ def train_model(train_net, net, dataset):
|
|||
|
||||
def validate_model(net, dataset):
|
||||
accs = []
|
||||
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
|
||||
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True, num_epochs=1)):
|
||||
train_x = Tensor(data['image'].astype(np.float32))
|
||||
label = Tensor(data['label'].astype(np.int32))
|
||||
output = net(train_x)
|
||||
|
|
|
@ -122,7 +122,7 @@ def test_svi_cvae():
|
|||
sample_label = Tensor([i for i in range(0, 8)] * 8, dtype=mstype.int32)
|
||||
generated_sample = cvae.generate_sample(sample_label, 64, IMAGE_SHAPE)
|
||||
# test function: reconstruct_sample
|
||||
for sample in ds_train.create_dict_iterator(output_numpy=True):
|
||||
for sample in ds_train.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
sample_x = Tensor(sample['image'], dtype=mstype.float32)
|
||||
sample_y = Tensor(sample['label'], dtype=mstype.int32)
|
||||
reconstructed_sample = cvae.reconstruct_sample(sample_x, sample_y)
|
||||
|
|
|
@ -110,7 +110,7 @@ def test_svi_vae():
|
|||
# test function: generate_sample
|
||||
generated_sample = vae.generate_sample(64, IMAGE_SHAPE)
|
||||
# test function: reconstruct_sample
|
||||
for sample in ds_train.create_dict_iterator(output_numpy=True):
|
||||
for sample in ds_train.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
sample_x = Tensor(sample['image'], dtype=mstype.float32)
|
||||
reconstructed_sample = vae.reconstruct_sample(sample_x)
|
||||
print('The loss of the trained network is ', trained_loss)
|
||||
|
|
|
@ -129,7 +129,7 @@ if __name__ == '__main__':
|
|||
epi_uncer_model_path=None,
|
||||
ale_uncer_model_path=None,
|
||||
save_model=False)
|
||||
for eval_data in ds_eval.create_dict_iterator(output_numpy=True):
|
||||
for eval_data in ds_eval.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
eval_data = Tensor(eval_data['image'], mstype.float32)
|
||||
epistemic_uncertainty = evaluation.eval_epistemic_uncertainty(eval_data)
|
||||
aleatoric_uncertainty = evaluation.eval_aleatoric_uncertainty(eval_data)
|
||||
|
|
|
@ -93,7 +93,7 @@ class LeNet5(nn.Cell):
|
|||
def train_model(train_net, net, dataset):
|
||||
accs = []
|
||||
loss_sum = 0
|
||||
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
|
||||
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True, num_epochs=1)):
|
||||
train_x = Tensor(data['image'].astype(np.float32))
|
||||
label = Tensor(data['label'].astype(np.int32))
|
||||
loss = train_net(train_x, label)
|
||||
|
@ -110,7 +110,7 @@ def train_model(train_net, net, dataset):
|
|||
|
||||
def validate_model(net, dataset):
|
||||
accs = []
|
||||
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
|
||||
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True, num_epochs=1)):
|
||||
train_x = Tensor(data['image'].astype(np.float32))
|
||||
label = Tensor(data['label'].astype(np.int32))
|
||||
output = net(train_x)
|
||||
|
|
|
@ -92,7 +92,7 @@ class LeNet5(nn.Cell):
|
|||
def train_model(train_net, net, dataset):
|
||||
accs = []
|
||||
loss_sum = 0
|
||||
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
|
||||
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True, num_epochs=1)):
|
||||
train_x = Tensor(data['image'].astype(np.float32))
|
||||
label = Tensor(data['label'].astype(np.int32))
|
||||
loss = train_net(train_x, label)
|
||||
|
@ -109,7 +109,7 @@ def train_model(train_net, net, dataset):
|
|||
|
||||
def validate_model(net, dataset):
|
||||
accs = []
|
||||
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
|
||||
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True, num_epochs=1)):
|
||||
train_x = Tensor(data['image'].astype(np.float32))
|
||||
label = Tensor(data['label'].astype(np.int32))
|
||||
output = net(train_x)
|
||||
|
|
|
@ -60,7 +60,7 @@ class MindData:
|
|||
def output_shapes(self):
|
||||
return self._output_shapes
|
||||
|
||||
def create_tuple_iterator(self):
|
||||
def create_tuple_iterator(self, num_epochs=-1):
|
||||
return self
|
||||
|
||||
@property
|
||||
|
|
|
@ -418,7 +418,7 @@ def test_pynative_resnet50():
|
|||
max_step = 21
|
||||
exceed_num = 0
|
||||
data_set = create_dataset(repeat_num=1, training=True, batch_size=batch_size)
|
||||
for element in data_set.create_dict_iterator():
|
||||
for element in data_set.create_dict_iterator(num_epochs=1):
|
||||
step = step + 1
|
||||
if step > max_step:
|
||||
break
|
||||
|
@ -434,4 +434,3 @@ def test_pynative_resnet50():
|
|||
if step > 1 and cost_time > 0.25:
|
||||
exceed_num = exceed_num + 1
|
||||
assert exceed_num < 20
|
||||
|
|
@ -138,8 +138,8 @@ def test_random_posterize_default_c_md5(plot=False, run_golden=True):
|
|||
|
||||
image_posterize = []
|
||||
image_original = []
|
||||
for item1, item2 in zip(data1.create_dict_iterator(output_numpy=True),
|
||||
data2.create_dict_iterator(output_numpy=True)):
|
||||
for item1, item2 in zip(data1.create_dict_iterator(output_numpy=True, num_epochs=1),
|
||||
data2.create_dict_iterator(output_numpy=True, num_epochs=1)):
|
||||
image1 = item1["image"]
|
||||
image2 = item2["image"]
|
||||
image_posterize.append(image1)
|
||||
|
|
|
@ -146,7 +146,7 @@ class DatasetLenet():
|
|||
def get_repeat_count(self):
|
||||
return 1
|
||||
|
||||
def create_tuple_iterator(self):
|
||||
def create_tuple_iterator(self, num_epochs=-1):
|
||||
return self
|
||||
|
||||
def test_double_subgraphs_train():
|
||||
|
|
|
@ -275,7 +275,7 @@ class DatasetLenet():
|
|||
def get_repeat_count(self):
|
||||
return 1
|
||||
|
||||
def create_tuple_iterator(self):
|
||||
def create_tuple_iterator(self, num_epochs=-1):
|
||||
return self
|
||||
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ class DatasetLenet():
|
|||
def get_repeat_count(self):
|
||||
return 1
|
||||
|
||||
def create_tuple_iterator(self):
|
||||
def create_tuple_iterator(self, num_epochs=-1):
|
||||
return self
|
||||
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ class Dataset():
|
|||
def get_repeat_count(self):
|
||||
return 1
|
||||
|
||||
def create_tuple_iterator(self):
|
||||
def create_tuple_iterator(self, num_epochs=-1):
|
||||
return self
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue