refactoring code directory for vgg16 and lstm

2020-06-16 11:22:52 +08:00 · 2020-06-16 11:22:52 +08:00 · 55b1d6feef
parent f48ba77648
commit 55b1d6feef
16 changed files with 266 additions and 34 deletions
--- a/example/lstm_aclImdb/README.md
+++ b/example/lstm_aclImdb/README.md
--- a/example/lstm_aclImdb/eval.py
+++ b/example/lstm_aclImdb/eval.py
@ -21,8 +21,8 @@ import os

 import numpy as np

-from config import lstm_cfg as cfg
-from dataset import create_dataset, convert_to_mindrecord
+from src.config import lstm_cfg as cfg
+from src.dataset import lstm_create_dataset, convert_to_mindrecord
 from mindspore import Tensor, nn, Model, context
 from mindspore.model_zoo.lstm import SentimentNet
 from mindspore.nn import Accuracy
@ -71,7 +71,7 @@ if __name__ == '__main__':
    model = Model(network, loss, opt, {'acc': Accuracy()})

    print("============== Starting Testing ==============")
-    ds_eval = create_dataset(args.preprocess_path, cfg.batch_size, training=False)
+    ds_eval = lstm_create_dataset(args.preprocess_path, cfg.batch_size, training=False)
    param_dict = load_checkpoint(args.ckpt_path)
    load_param_into_net(network, param_dict)
    if args.device_target == "CPU":
--- a/model_zoo/lstm/src/init.py
+++ b/model_zoo/lstm/src/init.py
@ -0,0 +1,14 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the License);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# httpwww.apache.orglicensesLICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
--- a/example/lstm_aclImdb/config.py
+++ b/example/lstm_aclImdb/config.py
--- a/example/lstm_aclImdb/dataset.py
+++ b/example/lstm_aclImdb/dataset.py
@ -19,12 +19,12 @@ import os

 import numpy as np

-from imdb import ImdbParser
 import mindspore.dataset as ds
 from mindspore.mindrecord import FileWriter
+from .imdb import ImdbParser


-def create_dataset(data_home, batch_size, repeat_num=1, training=True):
+def lstm_create_dataset(data_home, batch_size, repeat_num=1, training=True):
    """Data operations."""
    ds.config.set_seed(1)
    data_dir = os.path.join(data_home, "aclImdb_train.mindrecord0")
--- a/example/lstm_aclImdb/imdb.py
+++ b/example/lstm_aclImdb/imdb.py
--- a/model_zoo/lstm/src/lstm.py
+++ b/model_zoo/lstm/src/lstm.py
@ -0,0 +1,93 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""LSTM."""
+
+import numpy as np
+
+from mindspore import Tensor, nn, context
+from mindspore.ops import operations as P
+
+# Initialize short-term memory (h) and long-term memory (c) to 0
+def lstm_default_state(batch_size, hidden_size, num_layers, bidirectional):
+    """init default input."""
+    num_directions = 1
+    if bidirectional:
+        num_directions = 2
+
+    if context.get_context("device_target") == "CPU":
+        h_list = []
+        c_list = []
+        i = 0
+        while i < num_layers:
+            hi = Tensor(np.zeros((num_directions, batch_size, hidden_size)).astype(np.float32))
+            h_list.append(hi)
+            ci = Tensor(np.zeros((num_directions, batch_size, hidden_size)).astype(np.float32))
+            c_list.append(ci)
+            i = i + 1
+        h = tuple(h_list)
+        c = tuple(c_list)
+        return h, c
+
+    h = Tensor(
+        np.zeros((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32))
+    c = Tensor(
+        np.zeros((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32))
+    return h, c
+
+
+class SentimentNet(nn.Cell):
+    """Sentiment network structure."""
+
+    def __init__(self,
+                 vocab_size,
+                 embed_size,
+                 num_hiddens,
+                 num_layers,
+                 bidirectional,
+                 num_classes,
+                 weight,
+                 batch_size):
+        super(SentimentNet, self).__init__()
+        # Mapp words to vectors
+        self.embedding = nn.Embedding(vocab_size,
+                                      embed_size,
+                                      embedding_table=weight)
+        self.embedding.embedding_table.requires_grad = False
+        self.trans = P.Transpose()
+        self.perm = (1, 0, 2)
+        self.encoder = nn.LSTM(input_size=embed_size,
+                               hidden_size=num_hiddens,
+                               num_layers=num_layers,
+                               has_bias=True,
+                               bidirectional=bidirectional,
+                               dropout=0.0)
+
+        self.h, self.c = lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional)
+
+        self.concat = P.Concat(1)
+        if bidirectional:
+            self.decoder = nn.Dense(num_hiddens * 4, num_classes)
+        else:
+            self.decoder = nn.Dense(num_hiddens * 2, num_classes)
+
+    def construct(self, inputs):
+        # input：(64,500,300)
+        embeddings = self.embedding(inputs)
+        embeddings = self.trans(embeddings, self.perm)
+        output, _ = self.encoder(embeddings, (self.h, self.c))
+        # states[i] size(64,200)  -> encoding.size(64,400)
+        encoding = self.concat((output[0], output[-1]))
+        outputs = self.decoder(encoding)
+        return outputs
--- a/example/lstm_aclImdb/train.py
+++ b/example/lstm_aclImdb/train.py
@ -21,9 +21,9 @@ import os

 import numpy as np

-from config import lstm_cfg as cfg
-from dataset import convert_to_mindrecord
-from dataset import create_dataset
+from src.config import lstm_cfg as cfg
+from src.dataset import convert_to_mindrecord
+from src.dataset import lstm_create_dataset
 from mindspore import Tensor, nn, Model, context
 from mindspore.model_zoo.lstm import SentimentNet
 from mindspore.nn import Accuracy
@ -71,7 +71,7 @@ if __name__ == '__main__':
    model = Model(network, loss, opt, {'acc': Accuracy()})

    print("============== Starting Training ==============")
-    ds_train = create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs)
+    ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs)
    config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
                                 keep_checkpoint_max=cfg.keep_checkpoint_max)
    ckpoint_cb = ModelCheckpoint(prefix="lstm", directory=args.ckpt_path, config=config_ck)
--- a/example/vgg16_cifar10/README.md
+++ b/example/vgg16_cifar10/README.md
@ -98,7 +98,7 @@ parameters/options:
 ### Distribute Training

 ```
-Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]
+Usage: sh script/run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]

 parameters/options:
  MINDSPORE_HCCL_CONFIG_PATH   HCCL configuration file path.
--- a/example/vgg16_cifar10/eval.py
+++ b/example/vgg16_cifar10/eval.py
@ -17,14 +17,15 @@
 python eval.py --data_path=$DATA_HOME --device_id=$DEVICE_ID
 """
 import argparse
+
 import mindspore.nn as nn
+from mindspore import context
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.train.model import Model
-from mindspore import context
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from mindspore.model_zoo.vgg import vgg16
-from config import cifar_cfg as cfg
-import dataset
+from src.config import cifar_cfg as cfg
+from src.dataset import vgg_create_dataset
+from src.vgg import vgg16

 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Cifar10 classification')
@ -47,6 +48,6 @@ if __name__ == '__main__':
    param_dict = load_checkpoint(args_opt.checkpoint_path)
    load_param_into_net(net, param_dict)
    net.set_train(False)
-    dataset = dataset.create_dataset(args_opt.data_path, 1, False)
+    dataset = vgg_create_dataset(args_opt.data_path, 1, False)
    res = model.eval(dataset)
    print("result: ", res)
--- a/model_zoo/vgg16/scripts/run_distribute_train.sh
+++ b/model_zoo/vgg16/scripts/run_distribute_train.sh
@ -15,39 +15,38 @@
 # ============================================================================

 if [ $# != 2 ]
-then 
+then
    echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]"
 exit 1
 fi

 if [ ! -f $1 ]
-then 
+then
    echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
 exit 1
-fi 
+fi

 if [ ! -d $2 ]
-then 
+then
    echo "error: DATA_PATH=$2 is not a directory"
 exit 1
-fi 
+fi

-ulimit -u unlimited
 export DEVICE_NUM=8
 export RANK_SIZE=8
 export MINDSPORE_HCCL_CONFIG_PATH=$1

-for((i=0; i<${DEVICE_NUM}; i++))
+for((i=0;i<RANK_SIZE;i++))
 do
    export DEVICE_ID=$i
    export RANK_ID=$i
    rm -rf ./train_parallel$i
    mkdir ./train_parallel$i
    cp *.py ./train_parallel$i
-    cp *.sh ./train_parallel$i
+    cp -r src ./train_parallel$i
    cd ./train_parallel$i || exit
    echo "start training for rank $RANK_ID, device $DEVICE_ID"
    env > env.log
    python train.py --data_path=$2 --device_id=$i &> log &
    cd ..
-done
+done
--- a/model_zoo/vgg16/src/init.py
+++ b/model_zoo/vgg16/src/init.py
@ -0,0 +1,14 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the License);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# httpwww.apache.orglicensesLICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
--- a/example/vgg16_cifar10/config.py
+++ b/example/vgg16_cifar10/config.py
--- a/example/vgg16_cifar10/dataset.py
+++ b/example/vgg16_cifar10/dataset.py
@ -16,13 +16,15 @@
 Data operations, will be used in train.py and eval.py
 """
 import os
+
+import mindspore.common.dtype as mstype
 import mindspore.dataset as ds
 import mindspore.dataset.transforms.c_transforms as C
 import mindspore.dataset.transforms.vision.c_transforms as vision
-import mindspore.common.dtype as mstype
-from config import cifar_cfg as cfg
+from .config import cifar_cfg as cfg

-def create_dataset(data_home, repeat_num=1, training=True):
+
+def vgg_create_dataset(data_home, repeat_num=1, training=True):
    """Data operations."""
    ds.config.set_seed(1)
    data_dir = os.path.join(data_home, "cifar-10-batches-bin")
--- a/model_zoo/vgg16/src/vgg.py
+++ b/model_zoo/vgg16/src/vgg.py
@ -0,0 +1,104 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""VGG."""
+import mindspore.nn as nn
+from mindspore.common.initializer import initializer
+import mindspore.common.dtype as mstype
+
+def _make_layer(base, batch_norm):
+    """Make stage network of VGG."""
+    layers = []
+    in_channels = 3
+    for v in base:
+        if v == 'M':
+            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
+        else:
+            weight_shape = (v, in_channels, 3, 3)
+            weight = initializer('XavierUniform', shape=weight_shape, dtype=mstype.float32).to_tensor()
+            conv2d = nn.Conv2d(in_channels=in_channels,
+                               out_channels=v,
+                               kernel_size=3,
+                               padding=0,
+                               pad_mode='same',
+                               weight_init=weight)
+            if batch_norm:
+                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU()]
+            else:
+                layers += [conv2d, nn.ReLU()]
+            in_channels = v
+    return nn.SequentialCell(layers)
+
+
+class Vgg(nn.Cell):
+    """
+    VGG network definition.
+
+    Args:
+        base (list): Configuration for different layers, mainly the channel number of Conv layer.
+        num_classes (int): Class numbers. Default: 1000.
+        batch_norm (bool): Whether to do the batchnorm. Default: False.
+        batch_size (int): Batch size. Default: 1.
+
+    Returns:
+        Tensor, infer output tensor.
+
+    Examples:
+        >>> Vgg([64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
+        >>>     num_classes=1000, batch_norm=False, batch_size=1)
+    """
+
+    def __init__(self, base, num_classes=1000, batch_norm=False, batch_size=1):
+        super(Vgg, self).__init__()
+        _ = batch_size
+        self.layers = _make_layer(base, batch_norm=batch_norm)
+        self.flatten = nn.Flatten()
+        self.classifier = nn.SequentialCell([
+            nn.Dense(512 * 7 * 7, 4096),
+            nn.ReLU(),
+            nn.Dense(4096, 4096),
+            nn.ReLU(),
+            nn.Dense(4096, num_classes)])
+
+    def construct(self, x):
+        x = self.layers(x)
+        x = self.flatten(x)
+        x = self.classifier(x)
+        return x
+
+
+cfg = {
+    '11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+    '13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+    '16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
+    '19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
+}
+
+
+def vgg16(num_classes=1000):
+    """
+    Get Vgg16 neural network with batch normalization.
+
+    Args:
+        num_classes (int): Class numbers. Default: 1000.
+
+    Returns:
+        Cell, cell instance of Vgg16 neural network with batch normalization.
+
+    Examples:
+        >>> vgg16(num_classes=1000)
+    """
+
+    net = Vgg(cfg['16'], num_classes=num_classes, batch_norm=True)
+    return net
--- a/example/vgg16_cifar10/train.py
+++ b/example/vgg16_cifar10/train.py
@ -19,20 +19,24 @@ python train.py --data_path=$DATA_HOME --device_id=$DEVICE_ID
 import argparse
 import os
 import random
+
 import numpy as np
+
 import mindspore.nn as nn
 from mindspore import Tensor
+from mindspore import context
 from mindspore.communication.management import init
 from mindspore.nn.optim.momentum import Momentum
-from mindspore.train.model import Model, ParallelMode
-from mindspore import context
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
-from mindspore.model_zoo.vgg import vgg16
-from dataset import create_dataset
-from config import cifar_cfg as cfg
+from mindspore.train.model import Model, ParallelMode
+from src.config import cifar_cfg as cfg
+from src.dataset import vgg_create_dataset
+from src.vgg import vgg16
+
 random.seed(1)
 np.random.seed(1)

+
 def lr_steps(global_step, lr_max=None, total_epochs=None, steps_per_epoch=None):
    """Set learning rate."""
    lr_each_step = []
@ -72,12 +76,13 @@ if __name__ == '__main__':
                                          mirror_mean=True)
        init()

-    dataset = create_dataset(args_opt.data_path, cfg.epoch_size)
+    dataset = vgg_create_dataset(args_opt.data_path, cfg.epoch_size)
    batch_num = dataset.get_dataset_size()

    net = vgg16(num_classes=cfg.num_classes)
    lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num)
-    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay)
+    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum,
+                   weight_decay=cfg.weight_decay)
    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'},
                  amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None)