diff --git a/model_zoo/official/cv/warpctc/src/dataset.py b/model_zoo/official/cv/warpctc/src/dataset.py index 12caa32334e..12b75bd3bba 100755 --- a/model_zoo/official/cv/warpctc/src/dataset.py +++ b/model_zoo/official/cv/warpctc/src/dataset.py @@ -61,12 +61,18 @@ class _CaptchaDataset: return image, label +def transpose_hwc2whc(image): + """transpose image from HWC to WHC""" + image = np.transpose(image, (1, 0, 2)) + return image + + def create_dataset(dataset_path, batch_size=1, num_shards=1, shard_id=0, device_target='Ascend'): """ create train or evaluation dataset for warpctc Args: - dataset_path(int): dataset path + dataset_path(str): dataset path batch_size(int): batch size of generated dataset, default is 1 num_shards(int): number of devices shard_id(int): rank id @@ -79,12 +85,13 @@ def create_dataset(dataset_path, batch_size=1, num_shards=1, shard_id=0, device_ vc.Rescale(1.0 / 255.0, 0.0), vc.Normalize([0.9010, 0.9049, 0.9025], std=[0.1521, 0.1347, 0.1458]), vc.Resize((m.ceil(cf.captcha_height / 16) * 16, cf.captcha_width)), - vc.HWC2CHW() + c.TypeCast(mstype.float16) ] label_trans = [ c.TypeCast(mstype.int32) ] ds = ds.map(operations=image_trans, input_columns=["image"], num_parallel_workers=8) + ds = ds.map(operations=transpose_hwc2whc, input_columns=["image"], num_parallel_workers=8) ds = ds.map(operations=label_trans, input_columns=["label"], num_parallel_workers=8) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/model_zoo/official/cv/warpctc/src/metric.py b/model_zoo/official/cv/warpctc/src/metric.py index d535b9a4c44..8e4d1712101 100755 --- a/model_zoo/official/cv/warpctc/src/metric.py +++ b/model_zoo/official/cv/warpctc/src/metric.py @@ -50,7 +50,7 @@ class WarpCTCAccuracy(nn.Metric): def eval(self): if self._total_num == 0: - raise RuntimeError('Accuary can not be calculated, because the number of samples is 0.') + raise RuntimeError('Accuracy can not be calculated, because the number of samples is 0.') return self._correct_num / self._total_num def _is_eq(self, pred_lbl, target): diff --git a/model_zoo/official/cv/warpctc/src/warpctc.py b/model_zoo/official/cv/warpctc/src/warpctc.py index 3027999f833..73b5173c5d3 100755 --- a/model_zoo/official/cv/warpctc/src/warpctc.py +++ b/model_zoo/official/cv/warpctc/src/warpctc.py @@ -33,13 +33,13 @@ class StackedRNN(nn.Cell): hidden_size(int): the hidden size in LSTM layers, default is 512 """ - def __init__(self, input_size, batch_size=64, hidden_size=512): + def __init__(self, input_size, batch_size=64, hidden_size=512, num_class=11): super(StackedRNN, self).__init__() - self.batch_size = batch_size self.input_size = input_size - self.num_classes = 11 - self.reshape = P.Reshape() - self.cast = P.Cast() + self.batch_size = batch_size + self.hidden_size = hidden_size + self.num_class = num_class + k = (1 / hidden_size) ** 0.5 self.rnn1 = P.DynamicRNN(forget_bias=0.0) @@ -58,32 +58,23 @@ class StackedRNN(nn.Cell): self.c1 = Tensor(np.zeros(shape=(1, batch_size, hidden_size)).astype(np.float16)) self.c2 = Tensor(np.zeros(shape=(1, batch_size, hidden_size)).astype(np.float16)) - self.fc_weight = np.random.random((self.num_classes, hidden_size)).astype(np.float32) - self.fc_bias = np.random.random(self.num_classes).astype(np.float32) + self.fc_weight = Tensor(np.random.random((hidden_size, num_class)).astype(np.float16)) + self.fc_bias = Tensor(np.random.random(self.num_class).astype(np.float16)) - self.fc = nn.Dense(in_channels=hidden_size, out_channels=self.num_classes, weight_init=Tensor(self.fc_weight), - bias_init=Tensor(self.fc_bias)) - - self.fc.to_float(mstype.float32) - self.expand_dims = P.ExpandDims() - self.concat = P.Concat() + self.cast = P.Cast() + self.reshape = P.Reshape() self.transpose = P.Transpose() - self.squeeze = P.Squeeze(axis=0) + self.matmul = nn.MatMul() def construct(self, x): - x = self.cast(x, mstype.float16) - x = self.transpose(x, (3, 0, 2, 1)) + x = self.transpose(x, (1, 0, 2, 3)) x = self.reshape(x, (-1, self.batch_size, self.input_size)) y1, _, _, _, _, _, _, _ = self.rnn1(x, self.w1, self.b1, None, self.h1, self.c1) y2, _, _, _, _, _, _, _ = self.rnn2(y1, self.w2, self.b2, None, self.h2, self.c2) - output = () - for i in range(F.shape(x)[0]): - y2_after_fc = self.fc(self.squeeze(y2[i:i + 1:1])) - y2_after_fc = self.expand_dims(y2_after_fc, 0) - output += (y2_after_fc,) - output = self.concat(output) + # [time_step, bs, hidden_size] * [hidden_size, num_class] + [num_class] + output = self.matmul(y2, self.fc_weight) + self.fc_bias return output diff --git a/model_zoo/official/cv/warpctc/train.py b/model_zoo/official/cv/warpctc/train.py index 07614851915..096d8535154 100755 --- a/model_zoo/official/cv/warpctc/train.py +++ b/model_zoo/official/cv/warpctc/train.py @@ -51,14 +51,11 @@ if args_opt.platform == 'Ascend': if __name__ == '__main__': lr_scale = 1 if args_opt.run_distribute: + init() if args_opt.platform == 'Ascend': - init() - lr_scale = 1 device_num = int(os.environ.get("RANK_SIZE")) rank = int(os.environ.get("RANK_ID")) else: - init() - lr_scale = 1 device_num = get_group_size() rank = get_rank() context.reset_auto_parallel_context()