From d7f0c5064c1dfab8511b2e485361fdb3876494f6 Mon Sep 17 00:00:00 2001
From: yao_yf <yaoyifan1@huawei.com>
Date: Fri, 29 May 2020 09:17:15 +0800
Subject: [PATCH] adjust dir

---
 model_zoo/wide_and_deep/README.md             |  93 +++++++++++
 model_zoo/wide_and_deep/src/callbacks.py      | 146 +++++++++---------
 .../wide_and_deep/{tools => src}/config.py    |   5 +-
 model_zoo/wide_and_deep/{ => src}/metrics.py  |   0
 model_zoo/wide_and_deep/src/wide_and_deep.py  |   4 +-
 model_zoo/wide_and_deep/test.py               |  12 +-
 model_zoo/wide_and_deep/train.py              |  16 +-
 .../{tools => }/train_and_test.py             |  14 +-
 .../wide_and_deep/train_and_test_multinpu.py  |   8 +-
 9 files changed, 196 insertions(+), 102 deletions(-)
 create mode 100644 model_zoo/wide_and_deep/README.md
 rename model_zoo/wide_and_deep/{tools => src}/config.py (97%)
 rename model_zoo/wide_and_deep/{ => src}/metrics.py (100%)
 rename model_zoo/wide_and_deep/{tools => }/train_and_test.py (87%)

diff --git a/model_zoo/wide_and_deep/README.md b/model_zoo/wide_and_deep/README.md
new file mode 100644
index 00000000000..f770297dd01
--- /dev/null
+++ b/model_zoo/wide_and_deep/README.md
@@ -0,0 +1,93 @@
+recommendation Model
+## Overview
+This is an implementation of WideDeep as described in the [Wide & Deep Learning for Recommender System](https://arxiv.org/pdf/1606.07792.pdf) paper.
+
+WideDeep model jointly trained wide linear models and deep neural network, which combined the benefits of memorization and generalization for recommender systems.
+
+## Dataset
+The [Criteo datasets](http://labs.criteo.com/2014/02/download-kaggle-display-advertising-challenge-dataset/) are used for model training and evaluation.
+
+## Running Code
+
+### Download and preprocess dataset
+To download the dataset, please install Pandas package first. Then issue the following command:
+```
+bash download.sh
+```
+
+### Code Structure
+The entire code structure is as following:
+```
+|--- wide_and_deep/
+    train_and_test.py            "Entrance of Wide&Deep model training and evaluation"
+    test.py                      "Entrance of Wide&Deep model evaluation"
+    train.py                     "Entrance of Wide&Deep model training"
+    train_and_test_multinpu.py   "Entrance of Wide&Deep model data parallel training and evaluation"
+    |--- src/                    "entrance of training and evaluation"
+        config.py                "parameters configuration"
+        dataset.py               "Dataset loader class"
+        WideDeep.py              "Model structure"
+        callbacks.py             "Callback class for training and evaluation"
+        metrics.py               "Metric class"
+```
+
+### Train and evaluate model
+To train and evaluate the model, issue the following command:
+```
+python train_and_test.py
+```
+Arguments:
+  * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument.
+  * `--epochs`: Total train epochs.
+  * `--batch_size`: Training batch size.
+  * `--eval_batch_size`: Eval batch size.
+  * `--field_size`: The number of features.
+  * `--vocab_size`： The total features of dataset.
+  * `--emb_dim`： The dense embedding dimension of sparse feature.
+  * `--deep_layers_dim`： The dimension of all deep layers.
+  * `--deep_layers_act`： The activation of all deep layers.
+  * `--keep_prob`： The rate to keep in dropout layer.
+  * `--ckpt_path`：The location of the checkpoint file.
+  * `--eval_file_name` : Eval output file.
+  * `--loss_file_name` :  Loss output file.
+
+To train the model, issue the following command:
+```
+python train.py
+```
+Arguments:
+  * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument.
+  * `--epochs`: Total train epochs.
+  * `--batch_size`: Training batch size.
+  * `--eval_batch_size`: Eval batch size.
+  * `--field_size`: The number of features.
+  * `--vocab_size`： The total features of dataset.
+  * `--emb_dim`： The dense embedding dimension of sparse feature.
+  * `--deep_layers_dim`： The dimension of all deep layers.
+  * `--deep_layers_act`： The activation of all deep layers.
+  * `--keep_prob`： The rate to keep in dropout layer.
+  * `--ckpt_path`：The location of the checkpoint file.
+  * `--eval_file_name` : Eval output file.
+  * `--loss_file_name` :  Loss output file.
+
+To evaluate the model, issue the following command:
+```
+python test.py
+```
+Arguments:
+  * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument.
+  * `--epochs`: Total train epochs.
+  * `--batch_size`: Training batch size.
+  * `--eval_batch_size`: Eval batch size.
+  * `--field_size`: The number of features.
+  * `--vocab_size`： The total features of dataset.
+  * `--emb_dim`： The dense embedding dimension of sparse feature.
+  * `--deep_layers_dim`： The dimension of all deep layers.
+  * `--deep_layers_act`： The activation of all deep layers.
+  * `--keep_prob`： The rate to keep in dropout layer.
+  * `--ckpt_path`：The location of the checkpoint file.
+  * `--eval_file_name` : Eval output file.
+  * `--loss_file_name` :  Loss output file.
+
+There are other arguments about models and training process. Use the `--help` or `-h` flag to get a full list of possible arguments with detailed descriptions.
+
diff --git a/model_zoo/wide_and_deep/src/callbacks.py b/model_zoo/wide_and_deep/src/callbacks.py
index f7f4d81ca3a..1ceefc115a6 100644
--- a/model_zoo/wide_and_deep/src/callbacks.py
+++ b/model_zoo/wide_and_deep/src/callbacks.py
@@ -26,79 +26,79 @@ def add_write(file_path, out_str):
         file_out.write(out_str + "\n")
 
 
-    class LossCallBack(Callback):
+class LossCallBack(Callback):
+    """
+    Monitor the loss in training.
+
+    If the loss is NAN or INF, terminate the training.
+
+    Note:
+        If per_print_times is 0, do NOT print loss.
+
+    Args:
+        per_print_times (int): Print loss every times. Default: 1.
+    """
+    def __init__(self, config=None, per_print_times=1):
+        super(LossCallBack, self).__init__()
+        if not isinstance(per_print_times, int) or per_print_times < 0:
+            raise ValueError("per_print_times must be in and >= 0.")
+        self._per_print_times = per_print_times
+        self.config = config
+
+    def step_end(self, run_context):
+        cb_params = run_context.original_args()
+        wide_loss, deep_loss = cb_params.net_outputs[0].asnumpy(), cb_params.net_outputs[1].asnumpy()
+        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
+        cur_num = cb_params.cur_step_num
+        print("===loss===", cb_params.cur_epoch_num, cur_step_in_epoch, wide_loss, deep_loss)
+
+        # raise ValueError
+        if self._per_print_times != 0 and cur_num % self._per_print_times == 0 and config is not None:
+            loss_file = open(self.config.loss_file_name, "a+")
+            loss_file.write("epoch: %s, step: %s, wide_loss: %s, deep_loss: %s" %
+                            (cb_params.cur_epoch_num, cur_step_in_epoch, wide_loss, deep_loss))
+            loss_file.write("\n")
+            loss_file.close()
+            print("epoch: %s, step: %s, wide_loss: %s, deep_loss: %s" %
+                  (cb_params.cur_epoch_num, cur_step_in_epoch, wide_loss, deep_loss))
+
+
+class EvalCallBack(Callback):
+    """
+    Monitor the loss in evaluating.
+
+    If the loss is NAN or INF, terminate evaluating.
+
+    Note:
+        If per_print_times is 0, do NOT print loss.
+
+    Args:
+        print_per_step (int): Print loss every times. Default: 1.
+    """
+    def __init__(self, model, eval_dataset, auc_metric, config, print_per_step=1):
+        super(EvalCallBack, self).__init__()
+        if not isinstance(print_per_step, int) or print_per_step < 0:
+            raise ValueError("print_per_step must be int and >= 0.")
+        self.print_per_step = print_per_step
+        self.model = model
+        self.eval_dataset = eval_dataset
+        self.aucMetric = auc_metric
+        self.aucMetric.clear()
+        self.eval_file_name = config.eval_file_name
+
+    def epoch_name(self, run_context):
         """
-        Monitor the loss in training.
-
-        If the loss is NAN or INF, terminate the training.
-
-        Note:
-            If per_print_times is 0, do NOT print loss.
-
-        Args:
-            per_print_times (int): Print loss every times. Default: 1.
+        epoch name
         """
-        def __init__(self, config, per_print_times=1):
-            super(LossCallBack, self).__init__()
-            if not isinstance(per_print_times, int) or per_print_times < 0:
-                raise ValueError("per_print_times must be in and >= 0.")
-            self._per_print_times = per_print_times
-            self.config = config
+        self.aucMetric.clear()
+        context.set_auto_parallel_context(strategy_ckpt_save_file="",
+                                          strategy_ckpt_load_file="./strategy_train.ckpt")
+        start_time = time.time()
+        out = self.model.eval(self.eval_dataset)
+        end_time = time.time()
+        eval_time = int(end_time - start_time)
 
-        def step_end(self, run_context):
-            cb_params = run_context.original_args()
-            wide_loss, deep_loss = cb_params.net_outputs[0].asnumpy(), cb_params.net_outputs[1].asnumpy()
-            cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
-            cur_num = cb_params.cur_step_num
-            print("===loss===", cb_params.cur_epoch_num, cur_step_in_epoch, wide_loss, deep_loss)
-
-            # raise ValueError
-            if self._per_print_times != 0 and cur_num % self._per_print_times == 0:
-                loss_file = open(self.config.loss_file_name, "a+")
-                loss_file.write("epoch: %s, step: %s, wide_loss: %s, deep_loss: %s" %
-                                (cb_params.cur_epoch_num, cur_step_in_epoch, wide_loss, deep_loss))
-                loss_file.write("\n")
-                loss_file.close()
-                print("epoch: %s, step: %s, wide_loss: %s, deep_loss: %s" %
-                      (cb_params.cur_epoch_num, cur_step_in_epoch, wide_loss, deep_loss))
-
-
-    class EvalCallBack(Callback):
-        """
-        Monitor the loss in evaluating.
-
-        If the loss is NAN or INF, terminate evaluating.
-
-        Note:
-            If per_print_times is 0, do NOT print loss.
-
-        Args:
-            print_per_step (int): Print loss every times. Default: 1.
-        """
-        def __init__(self, model, eval_dataset, auc_metric, config, print_per_step=1):
-            super(EvalCallBack, self).__init__()
-            if not isinstance(print_per_step, int) or print_per_step < 0:
-                raise ValueError("print_per_step must be int and >= 0.")
-            self.print_per_step = print_per_step
-            self.model = model
-            self.eval_dataset = eval_dataset
-            self.aucMetric = auc_metric
-            self.aucMetric.clear()
-            self.eval_file_name = config.eval_file_name
-
-        def epoch_name(self, run_context):
-            """
-            epoch name
-            """
-            self.aucMetric.clear()
-            context.set_auto_parallel_context(strategy_ckpt_save_file="",
-                                              strategy_ckpt_load_file="./strategy_train.ckpt")
-            start_time = time.time()
-            out = self.model.eval(self.eval_dataset)
-            end_time = time.time()
-            eval_time = int(end_time - start_time)
-
-            time_str = time.strftime("%Y-%m-%d %H:%M%S", time.localtime())
-            out_str = "{}==== EvalCallBack model.eval(): {}; eval_time: {}s".format(time_str, out.values(), eval_time)
-            print(out_str)
-            add_write(self.eval_file_name, out_str)
+        time_str = time.strftime("%Y-%m-%d %H:%M%S", time.localtime())
+        out_str = "{}==== EvalCallBack model.eval(): {}; eval_time: {}s".format(time_str, out.values(), eval_time)
+        print(out_str)
+        add_write(self.eval_file_name, out_str)
diff --git a/model_zoo/wide_and_deep/tools/config.py b/model_zoo/wide_and_deep/src/config.py
similarity index 97%
rename from model_zoo/wide_and_deep/tools/config.py
rename to model_zoo/wide_and_deep/src/config.py
index 8d87904be01..707750c97a4 100644
--- a/model_zoo/wide_and_deep/tools/config.py
+++ b/model_zoo/wide_and_deep/src/config.py
@@ -38,9 +38,9 @@ def argparse_init():
     return parser
 
 
-class Config_WideDeep():
+class WideDeepConfig():
     """
-    Config_WideDeep
+    WideDeepConfig
     """
     def __init__(self):
         self.data_path = "./test_raw_data/"
@@ -70,6 +70,7 @@ class Config_WideDeep():
         """
         parser = argparse_init()
         args, _ = parser.parse_known_args()
+        self.data_path = args.data_path
         self.epochs = args.epochs
         self.batch_size = args.batch_size
         self.eval_batch_size = args.eval_batch_size
diff --git a/model_zoo/wide_and_deep/metrics.py b/model_zoo/wide_and_deep/src/metrics.py
similarity index 100%
rename from model_zoo/wide_and_deep/metrics.py
rename to model_zoo/wide_and_deep/src/metrics.py
diff --git a/model_zoo/wide_and_deep/src/wide_and_deep.py b/model_zoo/wide_and_deep/src/wide_and_deep.py
index 4957ce2f490..7772431ab33 100644
--- a/model_zoo/wide_and_deep/src/wide_and_deep.py
+++ b/model_zoo/wide_and_deep/src/wide_and_deep.py
@@ -135,8 +135,8 @@ class WideDeepModel(nn.Cell):
         self.field_size = config.field_size
         self.vocab_size = config.vocab_size
         self.emb_dim = config.emb_dim
-        self.deep_layer_args = config.deep_layer_args
-        self.deep_layer_dims_list, self.deep_layer_act = self.deep_layer_args
+        self.deep_layer_dims_list = config.deep_layer_dim
+        self.deep_layer_act = config.deep_layer_act
         self.init_args = config.init_args
         self.weight_init, self.bias_init = config.weight_bias_init
         self.weight_bias_init = config.weight_bias_init
diff --git a/model_zoo/wide_and_deep/test.py b/model_zoo/wide_and_deep/test.py
index a8eb0cdbc47..54969e7c945 100644
--- a/model_zoo/wide_and_deep/test.py
+++ b/model_zoo/wide_and_deep/test.py
@@ -20,11 +20,11 @@ import os
 from mindspore import Model, context
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 
-from wide_deep.models.WideDeep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel
-from wide_deep.utils.callbacks import LossCallBack, EvalCallBack
-from wide_deep.data.datasets import create_dataset
-from wide_deep.utils.metrics import AUCMetric
-from tools.config import Config_WideDeep
+from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel
+from src.callbacks import LossCallBack, EvalCallBack
+from src.datasets import create_dataset
+from src.metrics import AUCMetric
+from src.config import WideDeepConfig
 
 context.set_context(mode=context.GRAPH_MODE, device_target="Davinci",
                     save_graphs=True)
@@ -88,7 +88,7 @@ def test_eval(config):
 
 
 if __name__ == "__main__":
-    widedeep_config = Config_WideDeep()
+    widedeep_config = WideDeepConfig()
     widedeep_config.argparse_init()
 
     test_eval(widedeep_config.widedeep)
diff --git a/model_zoo/wide_and_deep/train.py b/model_zoo/wide_and_deep/train.py
index 4b73c1d4f7d..b3996e01cb5 100644
--- a/model_zoo/wide_and_deep/train.py
+++ b/model_zoo/wide_and_deep/train.py
@@ -16,19 +16,19 @@ import os
 from mindspore import Model, context
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
 
-from wide_deep.models.WideDeep import PredictWithSigmoid, TrainStepWarp, NetWithLossClass, WideDeepModel
-from wide_deep.utils.callbacks import LossCallBack
-from wide_deep.data.datasets import create_dataset
-from tools.config import Config_WideDeep
+from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel
+from src.callbacks import LossCallBack
+from src.datasets import create_dataset
+from src.config import WideDeepConfig
 
-context.set_context(model=context.GRAPH_MODE, device_target="Ascend", save_graphs=True)
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True)
 
 
 def get_WideDeep_net(configure):
     WideDeep_net = WideDeepModel(configure)
 
     loss_net = NetWithLossClass(WideDeep_net, configure)
-    train_net = TrainStepWarp(loss_net)
+    train_net = TrainStepWrap(loss_net)
     eval_net = PredictWithSigmoid(WideDeep_net)
 
     return train_net, eval_net
@@ -71,7 +71,7 @@ def test_train(configure):
     train_net.set_train()
 
     model = Model(train_net)
-    callback = LossCallBack(configure)
+    callback = LossCallBack(config=configure)
     ckptconfig = CheckpointConfig(save_checkpoint_steps=1,
                                   keep_checkpoint_max=5)
     ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=configure.ckpt_path, config=ckptconfig)
@@ -79,7 +79,7 @@ def test_train(configure):
 
 
 if __name__ == "__main__":
-    config = Config_WideDeep()
+    config = WideDeepConfig()
     config.argparse_init()
 
     test_train(config)
diff --git a/model_zoo/wide_and_deep/tools/train_and_test.py b/model_zoo/wide_and_deep/train_and_test.py
similarity index 87%
rename from model_zoo/wide_and_deep/tools/train_and_test.py
rename to model_zoo/wide_and_deep/train_and_test.py
index 9f08377c75e..5d9832b84bc 100644
--- a/model_zoo/wide_and_deep/tools/train_and_test.py
+++ b/model_zoo/wide_and_deep/train_and_test.py
@@ -17,11 +17,11 @@ import os
 from mindspore import Model, context
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
 
-from wide_deep.models.WideDeep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel
-from wide_deep.utils.callbacks import LossCallBack, EvalCallBack
-from wide_deep.data.datasets import create_dataset
-from wide_deep.utils.metrics import AUCMetric
-from tools.config import Config_WideDeep
+from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel
+from src.callbacks import LossCallBack, EvalCallBack
+from src.datasets import create_dataset
+from src.metrics import AUCMetric
+from src.config import WideDeepConfig
 
 context.set_context(mode=context.GRAPH_MODE, device_target="Davinci")
 
@@ -81,7 +81,7 @@ def test_train_eval(config):
 
     eval_callback = EvalCallBack(model, ds_eval, auc_metric, config)
 
-    callback = LossCallBack()
+    callback = LossCallBack(config=config)
     ckptconfig = CheckpointConfig(save_checkpoint_steps=1, keep_checkpoint_max=5)
     ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=config.ckpt_path, config=ckptconfig)
 
@@ -91,7 +91,7 @@ def test_train_eval(config):
 
 
 if __name__ == "__main__":
-    wide_deep_config = Config_WideDeep()
+    wide_deep_config = WideDeepConfig()
     wide_deep_config.argparse_init()
 
     test_train_eval(wide_deep_config)
diff --git a/model_zoo/wide_and_deep/train_and_test_multinpu.py b/model_zoo/wide_and_deep/train_and_test_multinpu.py
index 8c7686988cf..9e4b99f5462 100644
--- a/model_zoo/wide_and_deep/train_and_test_multinpu.py
+++ b/model_zoo/wide_and_deep/train_and_test_multinpu.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-"""train_imagenet."""
+"""train_multinpu."""
 
 
 import os
@@ -27,7 +27,7 @@ from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClas
 from src.callbacks import LossCallBack, EvalCallBack
 from src.datasets import create_dataset
 from src.metrics import AUCMetric
-from src.config import Config_WideDeep
+from src.config import WideDeepConfig
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 context.set_context(mode=GRAPH_MODE, device_target="Davinci", save_graph=True)
@@ -71,7 +71,7 @@ def test_train_eval():
     test_train_eval
     """
     np.random.seed(1000)
-    config = Config_WideDeep
+    config = WideDeepConfig
     data_path = Config.data_path
     batch_size = config.batch_size
     epochs = config.epochs
@@ -93,7 +93,7 @@ def test_train_eval():
 
     eval_callback = EvalCallBack(model, ds_eval, auc_metric, config)
 
-    callback = LossCallBack(config)
+    callback = LossCallBack(config=config)
     ckptconfig = CheckpointConfig(save_checkpoint_steps=1, keep_checkpoint_max=5)
     ckpoint_cb = ModelCheckpoint(prefix='widedeep_train',
                                  directory=config.ckpt_path, config=ckptconfig)