Add num_epochs to non-sink training

This commit is contained in:
hesham 2020-09-10 13:05:24 -04:00
parent e69d868016
commit 9cee0d2143
40 changed files with 50 additions and 49 deletions

View File

@ -63,7 +63,7 @@ class SVI:
for _ in range(1, epochs+1):
train_loss = 0
dataset_size = 0
for data in train_dataset.create_dict_iterator():
for data in train_dataset.create_dict_iterator(num_epochs=1):
x = Tensor(data['image'], dtype=mstype.float32)
y = Tensor(data['label'], dtype=mstype.int32)
dataset_size += len(x)

View File

@ -145,7 +145,7 @@ class DatasetHelper:
self.iter = iterclass(dataset, sink_size, epoch_num)
else:
iterclass = _DatasetIterNormal
self.iter = iterclass(dataset)
self.iter = iterclass(dataset, epoch_num=epoch_num)
def __iter__(self):
return self.iter.__iter__()
@ -290,11 +290,12 @@ class _DatasetIterPSLite(_DatasetIter):
class _DatasetIterNormal:
"""Iter for normal(non sink) mode, feed the data from host."""
def __init__(self, dataset):
def __init__(self, dataset, epoch_num=-1):
self.dataset = dataset
self.device_num = _get_device_num()
self.global_rank = _get_global_rank()
self.iter = self.dataset.create_tuple_iterator()
self.iter = self.dataset.create_tuple_iterator(num_epochs=epoch_num)
def __iter__(self):
return self

View File

@ -460,7 +460,8 @@ class Model:
is_train=True,
phase='train',
dataset=train_dataset,
dataset_sink_mode=False)
dataset_sink_mode=False,
epoch_num=epoch)
cb_params.cur_step_num = 0
run_context = RunContext(cb_params)
list_callback.begin(run_context)

View File

@ -57,7 +57,7 @@ def FasterRcnn_eval(dataset_path, ckpt_path, ann_file):
print("total images num: ", total)
print("Processing, please wait a moment.")
max_num = 128
for data in ds.create_dict_iterator():
for data in ds.create_dict_iterator(num_epochs=1):
eval_iter = eval_iter + 1
img_data = data['image']

View File

@ -57,7 +57,7 @@ def MaskRcnn_eval(dataset_path, ckpt_path, ann_file):
print("total images num: ", total)
print("Processing, please wait a moment.")
max_num = 128
for data in ds.create_dict_iterator(output_numpy=True):
for data in ds.create_dict_iterator(output_numpy=True, num_epochs=1):
eval_iter = eval_iter + 1
img_data = data['image']

View File

@ -200,7 +200,7 @@ def test(cloud_args=None):
per_batch_size=args.per_batch_size,
max_epoch=1, rank=args.rank, group_size=args.group_size,
mode='eval')
eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True)
eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True, num_epochs=1)
network = get_network(args.backbone, num_classes=args.num_classes, platform=args.platform)
if network is None:
raise NotImplementedError('not implement {}'.format(args.backbone))

View File

@ -44,7 +44,7 @@ def ssd_eval(dataset_path, ckpt_path):
print("\n========================================\n")
print("total images num: ", total)
print("Processing, please wait a moment.")
for data in ds.create_dict_iterator(output_numpy=True):
for data in ds.create_dict_iterator(output_numpy=True, num_epochs=1):
img_id = data['img_id']
img_np = data['image']
image_shape = data['image_shape']

View File

@ -159,7 +159,7 @@ def test(cloud_args=None):
for model in args.models:
dataset = classification_dataset(args.data_path, args.image_size, args.per_batch_size, mode='eval')
eval_dataloader = dataset.create_tuple_iterator(output_numpy=True)
eval_dataloader = dataset.create_tuple_iterator(output_numpy=True, num_epochs=1)
network = vgg16(args.num_classes, args, phase="test")
# pre_trained

View File

@ -299,7 +299,7 @@ def test():
input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
args.logger.info('Start inference....')
for i, data in enumerate(ds.create_dict_iterator()):
for i, data in enumerate(ds.create_dict_iterator(num_epochs=1)):
image = data["image"]
image_shape = data["image_shape"]

View File

@ -239,7 +239,7 @@ def train():
old_progress = -1
t_end = time.time()
data_loader = ds.create_dict_iterator(output_numpy=True)
data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1)
for i, data in enumerate(data_loader):
images = data["image"]

View File

@ -305,7 +305,7 @@ def test():
input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
args.logger.info('Start inference....')
for i, data in enumerate(ds.create_dict_iterator()):
for i, data in enumerate(ds.create_dict_iterator(num_epochs=1)):
image = data["image"]
image_shape = data["image_shape"]

View File

@ -224,7 +224,7 @@ def train():
old_progress = -1
t_end = time.time()
data_loader = ds.create_dict_iterator(output_numpy=True)
data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1)
shape_record = ShapeRecord()
for i, data in enumerate(data_loader):

View File

@ -44,7 +44,7 @@ def yolo_eval(dataset_path, ckpt_path):
print("\n========================================\n")
print("total images num: ", total)
print("Processing, please wait a moment.")
for data in ds.create_dict_iterator(output_numpy=True):
for data in ds.create_dict_iterator(output_numpy=True, num_epochs=1):
img_np = data['image']
image_shape = data['image_shape']
annotation = data['annotation']

View File

@ -119,7 +119,7 @@ def do_eval(dataset=None, network=None, num_class=2, assessment_method="accuracy
raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
for data in dataset.create_dict_iterator():
for data in dataset.create_dict_iterator(num_epochs=1):
input_data = []
for i in columns_list:
input_data.append(data[i])

View File

@ -128,7 +128,7 @@ def do_eval(dataset=None, network=None, use_crf="", num_class=2, assessment_meth
raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
for data in dataset.create_dict_iterator():
for data in dataset.create_dict_iterator(num_epochs=1):
input_data = []
for i in columns_list:
input_data.append(data[i])

View File

@ -109,7 +109,7 @@ def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="",
output = []
RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])
columns_list = ["input_ids", "input_mask", "segment_ids", "unique_ids"]
for data in dataset.create_dict_iterator():
for data in dataset.create_dict_iterator(num_epochs=1):
input_data = []
for i in columns_list:
input_data.append(data[i])

View File

@ -107,7 +107,7 @@ def transformer_infer(config, dataset):
probs = []
source_sentences = []
target_sentences = []
for batch in dataset.create_dict_iterator(output_numpy=True):
for batch in dataset.create_dict_iterator(output_numpy=True, num_epochs=1):
source_sentences.append(batch["source_eos_ids"])
target_sentences.append(batch["target_eos_ids"])
@ -232,7 +232,7 @@ def transformer_infer_ppl(config, dataset):
lengths = []
source_sentences = []
target_sentences = []
for batch in dataset.create_dict_iterator(output_numpy=True):
for batch in dataset.create_dict_iterator(output_numpy=True, num_epochs=1):
source_sentences.append(batch["source_eos_ids"])
target_sentences.append(batch["target_eos_ids"])

View File

@ -278,7 +278,7 @@ def do_eval_standalone():
callback = Accuracy()
columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
for data in eval_dataset.create_dict_iterator():
for data in eval_dataset.create_dict_iterator(num_epochs=1):
input_data = []
for i in columns_list:
input_data.append(data[i])

View File

@ -93,7 +93,7 @@ class EvalCallBack(Callback):
if cb_params.cur_step_num % 100 == 0:
callback = Accuracy()
columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
for data in self.dataset.create_dict_iterator():
for data in self.dataset.create_dict_iterator(num_epochs=1):
input_data = []
for i in columns_list:
input_data.append(data[i])

View File

@ -113,7 +113,7 @@ def run_transformer_eval():
predictions = []
source_sents = []
target_sents = []
for batch in dataset.create_dict_iterator(output_numpy=True):
for batch in dataset.create_dict_iterator(output_numpy=True, num_epochs=1):
source_sents.append(batch["source_eos_ids"])
target_sents.append(batch["target_eos_ids"])
source_ids = Tensor(batch["source_eos_ids"], mstype.int32)

View File

@ -22,7 +22,7 @@ def create_dataset(data_file):
num_parallel_workers=num_readers,
shuffle=True)
index = 0
for item in data_set.create_dict_iterator(output_numpy=True):
for item in data_set.create_dict_iterator(output_numpy=True, num_epochs=1):
print("example {}: {}".format(index, item))
index += 1
if index % 1000 == 0:

View File

@ -28,7 +28,7 @@ args = parser.parse_args()
data_set = ds.MindDataset(args.path)
num_iter = 0
for item in data_set.create_dict_iterator(output_numpy=True):
for item in data_set.create_dict_iterator(output_numpy=True, num_epochs=1):
print(item)
num_iter += 1
print("Total items # is {}".format(num_iter))

View File

@ -22,7 +22,7 @@ def create_dataset(data_file):
num_parallel_workers=num_readers,
shuffle=True)
index = 0
for item in data_set.create_dict_iterator(output_numpy=True):
for item in data_set.create_dict_iterator(output_numpy=True, num_epochs=1):
print("example {}: {}".format(index, item))
index += 1
if index % 1000 == 0:

View File

@ -22,7 +22,7 @@ def create_dataset(data_file):
num_parallel_workers=num_readers,
shuffle=True)
index = 0
for item in data_set.create_dict_iterator(output_numpy=True):
for item in data_set.create_dict_iterator(output_numpy=True, num_epochs=1):
print("example {}: {}".format(index, item))
index += 1
if index % 1000 == 0:

View File

@ -55,7 +55,7 @@ class MindData:
self.send_epoch_end = send_epoch_end
return self
def create_tuple_iterator(self):
def create_tuple_iterator(self, num_epochs=-1):
return self.__iter__()
def send(self, num_epochs=-1):

View File

@ -48,7 +48,7 @@ def use_minddataset(mindrecord):
columns_list=columns_list,
num_parallel_workers=4)
num_iter = 0
for _ in data_set.create_dict_iterator():
for _ in data_set.create_dict_iterator(num_epochs=1):
num_iter += 1
print_log(num_iter)
end = time.time()
@ -64,7 +64,7 @@ def use_tfrecorddataset(tfrecord):
shuffle=ds.Shuffle.GLOBAL)
data_set = data_set.shuffle(10000)
num_iter = 0
for _ in data_set.create_dict_iterator():
for _ in data_set.create_dict_iterator(num_epochs=1):
num_iter += 1
print_log(num_iter)
end = time.time()

View File

@ -96,7 +96,7 @@ if __name__ == '__main__':
dataset_types, dataset_shapes, (), 'dataset')
ds1.send()
for data in data_set.create_tuple_iterator(output_numpy=True):
for data in data_set.create_tuple_iterator(output_numpy=True, num_epochs=1):
output = net()
print(data[0].any())
print(

View File

@ -92,7 +92,7 @@ class BNNLeNet5(nn.Cell):
def train_model(train_net, net, dataset):
accs = []
loss_sum = 0
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True, num_epochs=1)):
train_x = Tensor(data['image'].astype(np.float32))
label = Tensor(data['label'].astype(np.int32))
loss = train_net(train_x, label)
@ -109,7 +109,7 @@ def train_model(train_net, net, dataset):
def validate_model(net, dataset):
accs = []
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True, num_epochs=1)):
train_x = Tensor(data['image'].astype(np.float32))
label = Tensor(data['label'].astype(np.int32))
output = net(train_x)

View File

@ -122,7 +122,7 @@ def test_svi_cvae():
sample_label = Tensor([i for i in range(0, 8)] * 8, dtype=mstype.int32)
generated_sample = cvae.generate_sample(sample_label, 64, IMAGE_SHAPE)
# test function: reconstruct_sample
for sample in ds_train.create_dict_iterator(output_numpy=True):
for sample in ds_train.create_dict_iterator(output_numpy=True, num_epochs=1):
sample_x = Tensor(sample['image'], dtype=mstype.float32)
sample_y = Tensor(sample['label'], dtype=mstype.int32)
reconstructed_sample = cvae.reconstruct_sample(sample_x, sample_y)

View File

@ -110,7 +110,7 @@ def test_svi_vae():
# test function: generate_sample
generated_sample = vae.generate_sample(64, IMAGE_SHAPE)
# test function: reconstruct_sample
for sample in ds_train.create_dict_iterator(output_numpy=True):
for sample in ds_train.create_dict_iterator(output_numpy=True, num_epochs=1):
sample_x = Tensor(sample['image'], dtype=mstype.float32)
reconstructed_sample = vae.reconstruct_sample(sample_x)
print('The loss of the trained network is ', trained_loss)

View File

@ -129,7 +129,7 @@ if __name__ == '__main__':
epi_uncer_model_path=None,
ale_uncer_model_path=None,
save_model=False)
for eval_data in ds_eval.create_dict_iterator(output_numpy=True):
for eval_data in ds_eval.create_dict_iterator(output_numpy=True, num_epochs=1):
eval_data = Tensor(eval_data['image'], mstype.float32)
epistemic_uncertainty = evaluation.eval_epistemic_uncertainty(eval_data)
aleatoric_uncertainty = evaluation.eval_aleatoric_uncertainty(eval_data)

View File

@ -93,7 +93,7 @@ class LeNet5(nn.Cell):
def train_model(train_net, net, dataset):
accs = []
loss_sum = 0
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True, num_epochs=1)):
train_x = Tensor(data['image'].astype(np.float32))
label = Tensor(data['label'].astype(np.int32))
loss = train_net(train_x, label)
@ -110,7 +110,7 @@ def train_model(train_net, net, dataset):
def validate_model(net, dataset):
accs = []
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True, num_epochs=1)):
train_x = Tensor(data['image'].astype(np.float32))
label = Tensor(data['label'].astype(np.int32))
output = net(train_x)

View File

@ -92,7 +92,7 @@ class LeNet5(nn.Cell):
def train_model(train_net, net, dataset):
accs = []
loss_sum = 0
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True, num_epochs=1)):
train_x = Tensor(data['image'].astype(np.float32))
label = Tensor(data['label'].astype(np.int32))
loss = train_net(train_x, label)
@ -109,7 +109,7 @@ def train_model(train_net, net, dataset):
def validate_model(net, dataset):
accs = []
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True, num_epochs=1)):
train_x = Tensor(data['image'].astype(np.float32))
label = Tensor(data['label'].astype(np.int32))
output = net(train_x)

View File

@ -60,7 +60,7 @@ class MindData:
def output_shapes(self):
return self._output_shapes
def create_tuple_iterator(self):
def create_tuple_iterator(self, num_epochs=-1):
return self
@property

View File

@ -418,7 +418,7 @@ def test_pynative_resnet50():
max_step = 21
exceed_num = 0
data_set = create_dataset(repeat_num=1, training=True, batch_size=batch_size)
for element in data_set.create_dict_iterator():
for element in data_set.create_dict_iterator(num_epochs=1):
step = step + 1
if step > max_step:
break
@ -434,4 +434,3 @@ def test_pynative_resnet50():
if step > 1 and cost_time > 0.25:
exceed_num = exceed_num + 1
assert exceed_num < 20

View File

@ -138,8 +138,8 @@ def test_random_posterize_default_c_md5(plot=False, run_golden=True):
image_posterize = []
image_original = []
for item1, item2 in zip(data1.create_dict_iterator(output_numpy=True),
data2.create_dict_iterator(output_numpy=True)):
for item1, item2 in zip(data1.create_dict_iterator(output_numpy=True, num_epochs=1),
data2.create_dict_iterator(output_numpy=True, num_epochs=1)):
image1 = item1["image"]
image2 = item2["image"]
image_posterize.append(image1)

View File

@ -146,7 +146,7 @@ class DatasetLenet():
def get_repeat_count(self):
return 1
def create_tuple_iterator(self):
def create_tuple_iterator(self, num_epochs=-1):
return self
def test_double_subgraphs_train():

View File

@ -275,7 +275,7 @@ class DatasetLenet():
def get_repeat_count(self):
return 1
def create_tuple_iterator(self):
def create_tuple_iterator(self, num_epochs=-1):
return self

View File

@ -61,7 +61,7 @@ class DatasetLenet():
def get_repeat_count(self):
return 1
def create_tuple_iterator(self):
def create_tuple_iterator(self, num_epochs=-1):
return self

View File

@ -59,7 +59,7 @@ class Dataset():
def get_repeat_count(self):
return 1
def create_tuple_iterator(self):
def create_tuple_iterator(self, num_epochs=-1):
return self