exception, coverage for autohf (#106)

* increase coverage * fixing exception messages * fixing import
2021-06-14 17:11:40 -04:00 · 2021-06-14 17:11:40 -04:00 · 926589bdda
parent c26720c299
commit 926589bdda
27 changed files with 1066 additions and 1500 deletions
--- a/.gitignore
+++ b/.gitignore
@ -155,3 +155,4 @@ logs
 automl.pkl

 .idea/*
+.DS_Store
--- a/flaml/nlp/README.md
+++ b/flaml/nlp/README.md
@ -5,7 +5,7 @@ from flaml.nlp.autotransformers import AutoTransformers

 autohf = AutoTransformers()
 preparedata_setting = {
-        "dataset_subdataset_name": "glue:rte",
+        "dataset_subdataset_name": "glue:mrpc",
        "pretrained_model_size": "electra-base-discriminator:base",
        "data_root_path": "data/",
        "max_seq_length": 128,
--- a/flaml/nlp/init.py
+++ b/flaml/nlp/init.py
@ -1,2 +1,3 @@
-from flaml.nlp.autotransformers import AutoTransformers
-from flaml.nlp.result_analysis.azure_utils import AzureUtils, JobID
+from .hpo.hpo_searchspace import AutoHPOSearchSpace
+from .autotransformers import AutoTransformers
+from .result_analysis.azure_utils import AzureUtils, JobID
--- a/flaml/nlp/autotransformers.py
+++ b/flaml/nlp/autotransformers.py
@ -1,41 +1,21 @@
 import json
 import os
-
-import torch
-import transformers
-import wandb
-
-from .dataset.dataprocess_auto import AutoEncodeText
 import numpy as np
-
-from ray.tune import CLIReporter
-
 import time
-import ray
-import datasets
-from datasets import load_dataset
-from transformers.trainer_utils import IntervalStrategy, HPSearchBackend
-
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig, TrainingArguments
-
-from .dataset.metric_auto import get_default_and_alternative_metric
-from .dataset.submission_auto import auto_output_prediction
-from .dataset.task_auto import get_default_task
-from .hpo.grid_searchspace_auto import AutoGridSearchSpace
-from .hpo.hpo_searchspace import AutoHPOSearchSpace
-from .huggingface.switch_head_auto import AutoSeqClassificationHead, MODEL_CLASSIFICATION_HEAD_MAPPING
-from .utils import PathUtils, _variable_override_default_alternative
-from .hpo.searchalgo_auto import AutoSearchAlgorithm
-from .hpo.scheduler_auto import AutoScheduler
-from .result_analysis.wandb_utils import WandbUtils
-from .result_analysis.azure_utils import JobID
-from .utils import load_console_args
-
-from .huggingface.trainer import TrainerForAutoTransformers
-
 import logging

-transformers.logging.set_verbosity_error()
+try:
+    import ray
+    from transformers import TrainingArguments
+    import datasets
+    import torch
+except ImportError:
+    print("To use the nlp component in flaml, run pip install flaml[nlp]")
+
+from .dataset.task_auto import get_default_task
+from .result_analysis.azure_utils import JobID
+from .huggingface.trainer import TrainerForAutoTransformers
+
 logger = logging.getLogger(__name__)
 logger_formatter = logging.Formatter(
    '[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
@ -99,55 +79,25 @@ class AutoTransformers:

    def _set_search_space(self,
                          **custom_hpo_args):
-        search_space_dict_hpo = search_space_dict_grid = None
-        if self.jobid_config.mod == "grid":
-            search_space_grid_json = AutoGridSearchSpace.from_model_and_dataset_name(self.jobid_config.pre,
-                                                                                     self.jobid_config.presz,
-                                                                                     self.get_full_data_name(),
-                                                                                     self.jobid_config.subdat, "grid")
-            search_space_dict_grid \
-                = AutoTransformers._convert_dict_to_ray_tune_space(search_space_grid_json, mode="grid")
-            search_space_dict_hpo = search_space_dict_grid
-        if self.jobid_config.mod != "grid" and self.jobid_config.mod != "gridbert":
-            search_space_hpo_json \
-                = AutoHPOSearchSpace.from_model_and_dataset_name(logger,
-                                                                 self.jobid_config.spa,
-                                                                 self.jobid_config.pre,
-                                                                 self.jobid_config.presz,
-                                                                 self.get_full_data_name(),
-                                                                 self.jobid_config.subdat,
-                                                                 **custom_hpo_args)
-            search_space_dict_hpo = AutoTransformers._convert_dict_to_ray_tune_space(search_space_hpo_json, mode="hpo")
-        elif self.jobid_config.mod == "gridbert":
-            search_space_hpo_json = AutoGridSearchSpace.from_model_and_dataset_name(
-                "bert",
-                "base",
-                self.get_full_data_name(),
-                self.jobid_config.subdat, "grid")
-            search_space_dict_hpo = AutoTransformers._convert_dict_to_ray_tune_space(search_space_hpo_json, mode="grid")
+        from .hpo.hpo_searchspace import AutoHPOSearchSpace

-        """
-            resolve the conflict in search_space_dict_hpo: only one of "max_steps" and "num_train_epochs" can exist
-            in the search space. If both exists, num_train_epochs is removed. Similarly, if "warmup_steps" and
-            "warmup_ratio" both exist, warmup_ratio is removed
-        """
-        search_space_dict_hpo = TrainerForAutoTransformers.resolve_hp_conflict(search_space_dict_hpo)
-        self._search_space_hpo = search_space_dict_hpo
-        if self.jobid_config.mod == "grid":
-            search_space_dict_grid = TrainerForAutoTransformers.resolve_hp_conflict(search_space_dict_grid)
-            self._search_space_grid = search_space_dict_grid
-        else:
-            self._search_space_grid = None
+        search_space_hpo_json \
+            = AutoHPOSearchSpace.from_model_and_dataset_name(self.jobid_config.spa,
+                                                             self.jobid_config.pre,
+                                                             self.jobid_config.presz,
+                                                             self.jobid_config.dat,
+                                                             self.jobid_config.subdat,
+                                                             **custom_hpo_args)
+        self._search_space_hpo = AutoTransformers._convert_dict_to_ray_tune_space(
+            search_space_hpo_json,
+            mode=self.jobid_config.mod)

-        try:
-            self.ds_config = custom_hpo_args["ds_config"]
-        except KeyError:
-            self.ds_config = None
-
-    def _wrapper(self, func, *args):  # with star
+    @staticmethod
+    def _wrapper(func, *args):  # with star
        return func(*args)

-    def _get_split_name(self, data_raw, fold_name=None):
+    @staticmethod
+    def _get_split_name(data_raw, fold_name=None):
        if fold_name:
            return fold_name
        fold_keys = data_raw.keys()
@ -157,7 +107,7 @@ class AutoTransformers:
            for each_split_name in {"train", "validation", "test"}:
                assert not (each_key.startswith(each_split_name) and each_key != each_split_name), \
                    "Dataset split must be within {}, must be explicitly specified in dataset_config, e.g.," \
-                    "'fold_name': ['train', 'validation_matched', 'test_matched']. Please refer to the example in the " \
+                    "'fold_name': ['train','validation_matched','test_matched']. Please refer to the example in the " \
                    "documentation of AutoTransformers.prepare_data()".format(",".join(fold_keys))
        return "train", "validation", "test"

@ -187,28 +137,47 @@ class AutoTransformers:

            Args:
                server_name:
-                    a string variable, which can be tmdev or azureml
+                    A string variable, which can be tmdev or azureml
                data_root_path:
-                    the root path for storing the checkpoints and output results, e.g., "data/"
+                    The root path for storing the checkpoints and output results, e.g., "data/"
                jobid_config:
-                    a JobID object describing the profile of job
+                    A JobID object describing the profile of job
                wandb_utils:
-                    a WandbUtils object for wandb operations
+                    A WandbUtils object for wandb operations
                max_seq_length (optional):
-                    max_seq_lckpt_per_epochength for the huggingface, this hyperparameter must be specified
+                    Max_seq_lckpt_per_epochength for the huggingface, this hyperparameter must be specified
                    at the data processing step
                resplit_portion:
-                    the proportion for resplitting the train and dev data when split_mode="resplit".
+                    The proportion for resplitting the train and dev data when split_mode="resplit".
                    If args.resplit_mode = "rspt", resplit_portion is required
+                is_wandb_on:
+                    A boolean variable indicating whether wandb is used
            '''
-        console_args = load_console_args(**custom_data_args)
+        from .dataset.dataprocess_auto import AutoEncodeText
+        from transformers import AutoTokenizer
+        from datasets import load_dataset
+        from .utils import PathUtils
+        from .utils import load_dft_args
+
        self._max_seq_length = max_seq_length
        self._server_name = server_name if server_name is not None else "tmdev"
-        self.jobid_config = jobid_config if jobid_config is not None else JobID(console_args)
-        self.wandb_utils = WandbUtils(is_wandb_on=is_wandb_on,
-                                      console_args=console_args,
-                                      jobid_config=self.jobid_config)
-        self.wandb_utils.set_wandb_per_run()
+        """
+            loading the jobid config from console args
+        """
+        console_args = load_dft_args()
+        self.jobid_config = JobID(console_args)
+        if jobid_config:
+            self.jobid_config = jobid_config
+        if len(custom_data_args) > 0:
+            self.jobid_config.set_jobid_from_console_args(console_args=custom_data_args)
+        if is_wandb_on:
+            from .result_analysis.wandb_utils import WandbUtils
+            self.wandb_utils = WandbUtils(is_wandb_on=is_wandb_on,
+                                          console_args=console_args,
+                                          jobid_config=self.jobid_config)
+            self.wandb_utils.set_wandb_per_run()
+        else:
+            self.wandb_utils = None

        self.path_utils = PathUtils(self.jobid_config, hpo_data_root_path=data_root_path)

@ -216,11 +185,14 @@ class AutoTransformers:
            assert resplit_portion, "If split mode is 'rspt', the resplit_portion must be provided. Please " \
                                    "refer to the example in the documentation of AutoTransformers.prepare_data()"
        if self.jobid_config.subdat:
-            data_raw = load_dataset(self.get_full_data_name(), self.jobid_config.subdat)
+            data_raw = load_dataset(JobID.dataset_list_to_str(self.jobid_config.dat),
+                                    self.jobid_config.subdat)
        else:
-            data_raw = self._wrapper(load_dataset, *self.jobid_config.dat)
+            data_raw = AutoTransformers._wrapper(load_dataset, *self.jobid_config.dat)

-        self._train_name, self._dev_name, self._test_name = self._get_split_name(data_raw, fold_name=fold_name)
+        self._train_name, self._dev_name, self._test_name = AutoTransformers._get_split_name(
+            data_raw,
+            fold_name=fold_name)
        auto_tokentoids_config = {"max_seq_length": self._max_seq_length}
        self._tokenizer = AutoTokenizer.from_pretrained(self.jobid_config.pre_full, use_fast=True)

@ -228,7 +200,7 @@ class AutoTransformers:
            return AutoEncodeText.from_model_and_dataset_name(
                data_raw,
                self.jobid_config.pre_full,
-                self.get_full_data_name(),
+                self.jobid_config.dat,
                self.jobid_config.subdat,
                **auto_tokentoids_config)

@ -247,7 +219,8 @@ class AutoTransformers:
        if self.jobid_config.spt == "rspt":
            all_folds_from_source = []
            assert "source" in resplit_portion.keys(), "Must specify the source for resplitting the dataset in" \
-                                                       "resplit_portion, which is a list of folder names, e.g., resplit_portion = {'source': ['train']}"
+                                                       "resplit_portion, which is a list of folder names, e.g., " \
+                                                       "resplit_portion = {'source': ['train']}"

            source_fold_names = resplit_portion['source']
            for each_fold_name in source_fold_names:
@ -279,8 +252,11 @@ class AutoTransformers:
    def _load_model(self,
                    checkpoint_path=None,
                    per_model_config=None):
+        from transformers import AutoConfig
+        from .huggingface.switch_head_auto import AutoSeqClassificationHead, MODEL_CLASSIFICATION_HEAD_MAPPING

-        this_task = get_default_task(self.get_full_data_name(), self.jobid_config.subdat)
+        this_task = get_default_task(self.jobid_config.dat,
+                                     self.jobid_config.subdat)
        if this_task == "seq-classification":
            self._num_labels = len(self.train_dataset.features["label"].names)
        elif this_task == "regression":
@ -290,6 +266,7 @@ class AutoTransformers:
            checkpoint_path = self.jobid_config.pre_full

        def get_this_model():
+            from transformers import AutoModelForSequenceClassification
            return AutoModelForSequenceClassification.from_pretrained(checkpoint_path, config=model_config)

        def is_pretrained_model_in_classification_head_list():
@ -331,15 +308,17 @@ class AutoTransformers:
            this_model.resize_token_embeddings(len(self._tokenizer))
            return this_model
        elif this_task == "regression":
-            model_config = self._set_model_config(checkpoint_path, per_model_config, 1)
+            model_config_num_labels = 1
+            model_config = _set_model_config()
            this_model = get_this_model()
            return this_model

    def _get_metric_func(self):
-        if self.get_full_data_name() in ("glue", "super_glue"):
-            metric = datasets.load.load_metric(self.get_full_data_name(), self.jobid_config.subdat)
-        elif self.get_full_data_name() in ("squad", "squad_v2"):
-            metric = datasets.load.load_metric(self.get_full_data_name())
+        data_name = JobID.dataset_list_to_str(self.jobid_config.dat)
+        if data_name in ("glue", "super_glue"):
+            metric = datasets.load.load_metric(data_name, self.jobid_config.subdat)
+        elif data_name in ("squad", "squad_v2"):
+            metric = datasets.load.load_metric(data_name)
        else:
            metric = datasets.load.load_metric(self.metric_name)
        return metric
@ -366,6 +345,7 @@ class AutoTransformers:

    @staticmethod
    def _separate_config(config):
+
        training_args_config = {}
        per_model_config = {}

@ -378,10 +358,12 @@ class AutoTransformers:
        return training_args_config, per_model_config

    def _objective(self, config, reporter, checkpoint_dir=None):
-        def model_init():
-            return self._load_model()
+        from transformers import IntervalStrategy

        from transformers.trainer_utils import set_seed
+
+        def model_init():
+            return self._load_model()
        set_seed(config["seed"])

        training_args_config, per_model_config = AutoTransformers._separate_config(config)
@ -404,7 +386,6 @@ class AutoTransformers:
            save_steps=ckpt_freq,
            save_total_limit=0,
            fp16=self._fp16,
-            deepspeed=self.ds_config,
            **training_args_config,
        )

@ -423,10 +404,15 @@ class AutoTransformers:
        """
            create a wandb run. If os.environ["WANDB_MODE"] == "offline", run = None
        """
-        run = self.wandb_utils.set_wandb_per_trial()
-        if os.environ["WANDB_MODE"] == "online":
+
+        if self.wandb_utils:
+            run = self.wandb_utils.set_wandb_per_trial()
+            import wandb
            for each_hp in config:
                wandb.log({each_hp: config[each_hp]})
+        else:
+            run = None
+
        trainer.train()
        trainer.evaluate(self.eval_dataset)
        """
@ -466,6 +452,8 @@ class AutoTransformers:
                         search_algo_name,
                         search_algo_args_mode,
                         **custom_hpo_args):
+        from .hpo.searchalgo_auto import AutoSearchAlgorithm
+
        if search_algo_name in ("bs", "cfo"):
            self._verify_init_config(**custom_hpo_args)
        search_algo = AutoSearchAlgorithm.from_method_name(
@ -488,9 +476,6 @@ class AutoTransformers:
        assert len(subdirs) == 1, subdirs
        return subdirs[0]

-    def get_full_data_name(self):
-        return JobID.dataset_list_to_str(self.jobid_config.dat, "dat")
-
    def _save_ckpt_json(self,
                        best_ckpt):
        json.dump({"best_ckpt": best_ckpt},
@ -517,11 +502,15 @@ class AutoTransformers:
            raise err

    def _set_metric(self, custom_metric_name=None, custom_metric_mode_name=None):
-        default_metric, default_mode, all_metrics, all_modes = get_default_and_alternative_metric(
-            self.get_full_data_name(),
-            subdataset_name=self.jobid_config.subdat,
-            custom_metric_name=custom_metric_name,
-            custom_metric_mode_name=custom_metric_mode_name)
+        from .dataset.metric_auto import get_default_and_alternative_metric
+        from .utils import _variable_override_default_alternative
+
+        default_metric, default_mode, all_metrics, all_modes = \
+            get_default_and_alternative_metric(
+                dataset_name_list=self.jobid_config.dat,
+                subdataset_name=self.jobid_config.subdat,
+                custom_metric_name=custom_metric_name,
+                custom_metric_mode_name=custom_metric_mode_name)
        _variable_override_default_alternative(logger,
                                               self,
                                               "metric_name",
@ -538,7 +527,8 @@ class AutoTransformers:
        self._all_modes = all_modes

    def _set_task(self):
-        self.task_name = get_default_task(self.get_full_data_name(), self.jobid_config.subdat)
+        self.task_name = get_default_task(self.jobid_config.dat,
+                                          self.jobid_config.subdat)

    def fit_hf(self,
               resources_per_trial,
@ -549,47 +539,46 @@ class AutoTransformers:
               _fp16=True,
               **custom_hpo_args
               ):
+        from transformers.trainer_utils import HPSearchBackend
+
        '''Fine tuning the huggingface using HF's API Transformers.hyperparameter_search (for comparitive purpose).
-           Transformers.hyperparameter_search has the following disadvantages:
-             (1) it does not return tune.analysis.Analysis result, what is analysis used for
-             (2) it is inconvenient to develop on top of Transformers.hyperparameter_search, whose trainable function,
-             search space, etc. are defined inside of Transformers.hyperparameter_search.
-
-                An example:
-                    autohf_settings = {"resources_per_trial": {"cpu": 1},
-                               "num_samples": 1,
-                               "time_budget": 100000,
-                               "ckpt_per_epoch": 1,
-                               "fp16": False,
-                              }
-                    validation_metric, analysis = autohf.fit(**autohf_settings,)
-
-                Args:
-                    resources_per_trial:
-                        A dict showing the resources used by each trial,
-                        e.g., {"gpu": 4, "cpu": 4}
-                    num_samples:
-                        An int variable of the maximum number of trials
-                    time_budget:
-                        An int variable of the maximum time budget
-                    custom_metric_name:
-                        A string of the dataset name or a function,
-                        e.g., 'accuracy', 'f1', 'loss',
-                    custom_metric_mode_name:
-                        A string of the mode name,
-                        e.g., "max", "min", "last", "all"
-                    fp16:
-                        boolean, default = True | whether to use fp16
-                    custom_hpo_args:
-                        The additional keyword arguments, e.g.,
-                        custom_hpo_args = {"points_to_evaluate": [{
-                                   "num_train_epochs": 1,
-                                   "per_device_train_batch_size": 128, }]}
-
-                Returns:
-                   validation_metric:
-                        a dict storing the validation score
-                '''
+               Transformers.hyperparameter_search has the following disadvantages:
+            (1) it does not return tune.analysis.Analysis result, what is analysis used for
+            (2) it is inconvenient to develop on top of Transformers.hyperparameter_search, whose trainable function,
+                 search space, etc. are defined inside of Transformers.hyperparameter_search.
+               An example:
+            autohf_settings = {"resources_per_trial": {"cpu": 1},
+                       "num_samples": 1,
+                       "time_budget": 100000,
+                       "ckpt_per_epoch": 1,
+                       "fp16": False,
+                      }
+            validation_metric, analysis = autohf.fit(**autohf_settings,)
+            Args:
+                resources_per_trial:
+                    A dict showing the resources used by each trial,
+                    e.g., {"gpu": 4, "cpu": 4}
+                num_samples:
+                    An int variable of the maximum number of trials
+                time_budget:
+                    An int variable of the maximum time budget
+                custom_metric_name:
+                    A string of the dataset name or a function,
+                    e.g., 'accuracy', 'f1', 'loss',
+                custom_metric_mode_name:
+                    A string of the mode name,
+                    e.g., "max", "min", "last", "all"
+                fp16:
+                    boolean, default = True | whether to use fp16
+                custom_hpo_args:
+                    The additional keyword arguments, e.g.,
+                    custom_hpo_args = {"points_to_evaluate": [{
+                               "num_train_epochs": 1,
+                               "per_device_train_batch_size": 128, }]}
+            Returns:
+               validation_metric:
+                    a dict storing the validation score
+            '''

        def model_init():
            return self._load_model()
@ -626,7 +615,7 @@ class AutoTransformers:
        best_run = trainer.hyperparameter_search(
            n_trials=num_samples,
            time_budget_s=time_budget,
-            hp_space=ray_hp_space,
+            # hp_space=ray_hp_space,
            backend=HPSearchBackend.RAY,
            resources_per_trial=resources_per_trial)
        duration = time.time() - start_time
@ -669,7 +658,8 @@ class AutoTransformers:
            ckpt_per_epoch=1,
            fp16=True,
            verbose=1,
-            resources_per_trial={"gpu": 1, "cpu": 1},
+            resources_per_trial=None,
+            ray_local_mode=False,
            **custom_hpo_args):
        '''Fine tuning the huggingface using the hpo setting

@ -703,6 +693,8 @@ class AutoTransformers:
                messages
            fp16:
                boolean, default = True | whether to use fp16
+            ray_local_mode:
+                boolean, default = False | whether to use the local mode (debugging mode) for ray tune.run
            custom_hpo_args:
                The additional keyword arguments, e.g.,
                custom_hpo_args = {"points_to_evaluate": [{
@ -716,13 +708,22 @@ class AutoTransformers:
                a ray.tune.analysis.Analysis object storing the analysis results from tune.run

        '''
+        from .hpo.scheduler_auto import AutoScheduler
+
+        """
+         Specify the other parse of jobid configs from custom_hpo_args, e.g., if the search algorithm was not specified
+         previously, can specify the algorithm here
+        """
+        if len(custom_hpo_args) > 0:
+            self.jobid_config.set_jobid_from_console_args(console_args=custom_hpo_args)
+
        self._resources_per_trial = resources_per_trial
        self._set_metric(custom_metric_name, custom_metric_mode_name)
        self._set_task()
        self._fp16 = fp16
-        ray.init(local_mode=True)
-
+        ray.init(local_mode=ray_local_mode)
        self._set_search_space(**custom_hpo_args)
+
        search_algo = self._get_search_algo(self.jobid_config.alg, self.jobid_config.arg, **custom_hpo_args)
        scheduler = AutoScheduler.from_scheduler_name(self.jobid_config.pru)
        self.ckpt_per_epoch = ckpt_per_epoch
@ -802,17 +803,16 @@ class AutoTransformers:
        test_trainer = TrainerForAutoTransformers(best_model, training_args)

        if self.jobid_config.spt == "ori":
-            try:
+            if "label" in self.test_dataset.keys():
                self.test_dataset.remove_columns_("label")
-            except ValueError:
-                pass
+                print("Cleaning the existing label column from test data")

        test_dataloader = test_trainer.get_test_dataloader(self.test_dataset)
        predictions, labels, _ = test_trainer.prediction_loop(test_dataloader, description="Prediction")
        predictions = np.squeeze(predictions) \
-            if get_default_task(self.get_full_data_name(), self.jobid_config.subdat) == "regression" \
+            if get_default_task(self.jobid_config.dat,
+                                self.jobid_config.subdat) == "regression" \
            else np.argmax(predictions, axis=1)
-        torch.cuda.empty_cache()

        if self.jobid_config.spt == "rspt":
            assert labels is not None
@ -847,6 +847,11 @@ class AutoTransformers:
            Returns:
                the path of the output .zip file
        """
-        return auto_output_prediction(self.get_full_data_name(), output_prediction_path,
-                                      output_zip_file_name, predictions, self.train_dataset,
-                                      self._dev_name, self.jobid_config.subdat)
+        from .dataset.submission_auto import auto_output_prediction
+        return auto_output_prediction(self.jobid_config.dat,
+                                      output_prediction_path,
+                                      output_zip_file_name,
+                                      predictions,
+                                      self.train_dataset,
+                                      self._dev_name,
+                                      self.jobid_config.subdat)
--- a/flaml/nlp/dataset/dataprocess_auto.py
+++ b/flaml/nlp/dataset/dataprocess_auto.py
@ -178,7 +178,7 @@ class AutoEncodeText:
    def from_model_and_dataset_name(cls,
                                    data_raw,
                                    model_checkpoint_path,
-                                    dataset_name,
+                                    dataset_name_list: list = None,
                                    subdataset_name=None,
                                    **kwargs):
        """
@ -193,8 +193,8 @@ class AutoEncodeText:
            model_checkpoint_path:
                A string variable which specifies the model path, e.g., "google/electra-base-discriminator"

-            dataset_name:
-                A string variable which is the dataset name, e.g., "glue"
+            dataset_name_list:
+                A list which is the dataset name, e.g., ["glue"]

            subdataset_name:
                A string variable which is the sub dataset name,e.g., "rte"
@ -208,6 +208,8 @@ class AutoEncodeText:
            >>> AutoEncodeText.from_model_and_dataset_name(data_raw, "google/electra-base-discriminator", ["glue"], "rte")

        """
+        from ..result_analysis.azure_utils import JobID
+        dataset_name = JobID.dataset_list_to_str(dataset_name_list)
        if (dataset_name, subdataset_name) in TOKENIZER_MAPPING.keys():
            this_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_path, use_fast=True)
            token_func = TOKENIZER_MAPPING[(dataset_name, subdataset_name)]
@ -220,6 +222,6 @@ class AutoEncodeText:
        raise ValueError(
            "Unrecognized method {},{} for this kind of AutoGridSearchSpace: {}.\n"
            "Method name should be one of {}.".format(
-                dataset_name, subdataset_name, cls.__name__, ", ".join(c.__name__ for c in TOKENIZER_MAPPING.keys())
+                dataset_name, subdataset_name, cls.__name__, ", ".join(c[0] for c in TOKENIZER_MAPPING.keys())
            )
        )
--- a/flaml/nlp/dataset/metric_auto.py
+++ b/flaml/nlp/dataset/metric_auto.py
@ -1,5 +1,6 @@
 # https://github.com/huggingface/datasets/blob/master/metrics/glue/glue.py
 from collections import OrderedDict
+import typing

 metric_mode_mapping_glue = {
    "cola": [("matthews_correlation", "max")],
@ -42,10 +43,12 @@ METRIC_MAPPING = OrderedDict(
 )


-def get_default_and_alternative_metric(dataset_name,
+def get_default_and_alternative_metric(dataset_name_list: typing.List,
                                       subdataset_name=None,
                                       custom_metric_name=None,
                                       custom_metric_mode_name=None):
+    from ..result_analysis.azure_utils import JobID
+    dataset_name = JobID.dataset_list_to_str(dataset_name_list)
    if dataset_name not in METRIC_MAPPING.keys():
        assert custom_metric_name and custom_metric_mode_name, \
            "The dataset is not in {}, you must explicitly specify " \
--- a/flaml/nlp/dataset/submission_auto.py
+++ b/flaml/nlp/dataset/submission_auto.py
@ -42,12 +42,13 @@ test_size_glue = {
 }


-def output_prediction_glue(output_path, output_dir_name, predictions, train_data, dev_name, subdataset_name):
-    output_dir = os.path.join(output_path, output_dir_name)
+def output_prediction_glue(output_path, zip_file_name, predictions, train_data, dev_name, subdataset_name):
+    output_dir = os.path.join(output_path, zip_file_name)
    if os.path.exists(output_dir):
        assert os.path.isdir(output_dir)
    else:
-        os.mkdir(output_dir)
+        import pathlib
+        pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
    if subdataset_name != "stsb":
        label_list = train_data.features["label"].names

@ -81,8 +82,8 @@ def output_prediction_glue(output_path, output_dir_name, predictions, train_data
                                else:
                                    writer.write(f"{index}\t{item:3.3f}\n")

-    shutil.make_archive(os.path.join(output_path, output_dir_name), 'zip', output_dir)
-    return os.path.join(output_path, output_dir_name + ".zip")
+    shutil.make_archive(os.path.join(output_path, zip_file_name), 'zip', output_dir)
+    return os.path.join(output_path, zip_file_name + ".zip")


 OUTPUT_PREDICTION_MAPPING = OrderedDict(
@ -92,16 +93,18 @@ OUTPUT_PREDICTION_MAPPING = OrderedDict(
 )


-def auto_output_prediction(dataset_name,
+def auto_output_prediction(dataset_name_list: list,
                           output_path,
-                           output_dir_name,
+                           zip_file_name,
                           predictions,
                           train_data,
                           dev_name,
                           subset_name):
+    from ..result_analysis.azure_utils import JobID
+    dataset_name = JobID.dataset_list_to_str(dataset_name_list)
    if dataset_name in OUTPUT_PREDICTION_MAPPING.keys():
        return OUTPUT_PREDICTION_MAPPING[dataset_name](output_path,
-                                                       output_dir_name,
+                                                       zip_file_name,
                                                       predictions,
                                                       train_data,
                                                       dev_name,
--- a/flaml/nlp/dataset/task_auto.py
+++ b/flaml/nlp/dataset/task_auto.py
@ -30,7 +30,9 @@ TASK_MAPPING = OrderedDict(
 )


-def get_default_task(dataset_name, subdataset_name=None):
+def get_default_task(dataset_name_list: list, subdataset_name=None):
+    from ..result_analysis.azure_utils import JobID
+    dataset_name = JobID.dataset_list_to_str(dataset_name_list)
    assert dataset_name in TASK_MAPPING.keys(), "The dataset is not in {}, you must explicitly specify " \
                                                "the custom_metric_name and custom_metric_mode_name".format(
        ",".join(TASK_MAPPING.keys()))
--- a/flaml/nlp/hpo/get_grid_search_space.py
+++ b/flaml/nlp/hpo/get_grid_search_space.py
@ -1,8 +1,9 @@
 # lookup table for the grid configs in each pre-trained language huggingface for different tasks
-import copy


-def get_space_union_and_unique(search_space_common, search_space_unique, this_case_tags: list):
+def get_space_union_and_unique(search_space_common,
+                               search_space_unique,
+                               this_case_tags: list):
    """
        get the recommended search configs for each pre-trained language models

@ -37,7 +38,7 @@ def get_space_union_and_unique(search_space_common, search_space_unique, this_ca


 def get_deberta_space(model_size_type=None,
-                      dataset_name=None,
+                      dataset_name_list: list = None,
                      subdataset_name=None,
                      algo_mode=None):
    """
@ -64,18 +65,17 @@ def get_deberta_space(model_size_type=None,


 def get_longformer_space(model_size_type=None,
-                         dataset_name=None,
+                         dataset_name_list: list = None,
                         subdataset_name=None,
                         algo_mode=None):
    """
        TODO: Longformer: The Long-Document Transformer
    """
-    if dataset_name == "glue":
-        return
+    return


 def get_funnel_space(model_size_type=None,
-                     dataset_name=None,
+                     dataset_name_list: list = None,
                     subdataset_name=None,
                     algo_mode=None):
    """
@ -154,11 +154,13 @@ def get_funnel_space(model_size_type=None,
    }
    from ..result_analysis.azure_utils import JobID
    return get_space_union_and_unique(search_space_common, search_space_unique,
-                                      [JobID.get_full_data_name(dataset_name, subdataset_name)])
+                                      [JobID.get_full_data_name(
+                                          dataset_name_list,
+                                          subdataset_name)])


 def get_bert_space(model_size_type=None,
-                   dataset_name=None,
+                   dataset_name_list: list = None,
                   subdataset_name=None,
                   algo_mode=None):
    """
@ -203,11 +205,13 @@ def get_bert_space(model_size_type=None,
            "num_train_epochs": [2, 3, 4],
        }
    }
-    return get_space_union_and_unique(search_space_common, search_space_unique, [dataset_name])
+    return get_space_union_and_unique(search_space_common,
+                                      search_space_unique,
+                                      dataset_name_list)


 def get_roberta_space(model_size_type=None,
-                      dataset_name=None,
+                      dataset_name_list: list = None,
                      subdataset_name=None,
                      algo_mode=None):
    # RoBERTa: A Robustly Optimized BERT Pretraining Approach
@ -241,11 +245,13 @@ def get_roberta_space(model_size_type=None,
            "num_train_epochs": [2],
        }
    }
-    return get_space_union_and_unique(search_space_common, search_space_unique, [dataset_name])
+    return get_space_union_and_unique(search_space_common,
+                                      search_space_unique,
+                                      dataset_name_list)


 def get_electra_space(model_size_type=None,
-                      dataset_name=None,
+                      dataset_name_list: list = None,
                      subdataset_name=None,
                      algo_mode=None):
    """
@ -255,8 +261,7 @@ def get_electra_space(model_size_type=None,
    assert model_size_type in ("small", "base", "large", "intermediate", "xlarge"), \
        "Electra paper has only provided hyperparameter for the small and base huggingface"
    search_space_common = {
-        "learning_rate": [3e-5, 5e-5, 1e-4, 1.5e-4] if algo_mode == "grid"
-        else [3e-5, 5e-5, 1e-4, 1.5e-4, 2e-4, 3e-4, 5e-3],
+        "learning_rate": [3e-5, 5e-5, 1e-4, 1.5e-4],
        "weight_decay": [0.0],
        "adam_epsilon": [1e-6],
        "warmup_ratio": [0.1],
@ -282,7 +287,7 @@ def get_electra_space(model_size_type=None,
            "num_train_epochs": [3],
        },
        "glue_mrpc": {
-            "num_train_epochs": [3],
+            "num_train_epochs": [0.2],
        },
        "glue_cola": {
            "num_train_epochs": [3],
@ -302,11 +307,13 @@ def get_electra_space(model_size_type=None,
    }
    from ..result_analysis.azure_utils import JobID
    return get_space_union_and_unique(search_space_common, search_space_unique,
-                                      [JobID.get_full_data_name(dataset_name, subdataset_name), model_size_type])
+                                      [JobID.get_full_data_name(
+                                          dataset_name_list,
+                                          subdataset_name), model_size_type])


 def get_mobilebert_space(model_size_type=None,
-                         dataset_name=None,
+                         dataset_name_list: list = None,
                         subdataset_name=None,
                         algo_mode=None):
    """
@ -326,7 +333,7 @@ def get_mobilebert_space(model_size_type=None,


 def get_albert_space(model_size_type=None,
-                     dataset_name=None,
+                     dataset_name_list: list = None,
                     subdataset_name=None,
                     algo_mode=None):
    """
@ -453,4 +460,6 @@ def get_albert_space(model_size_type=None,
    # rates ((1-10) * e-5), and the number of epochs (2-10)
    from ..result_analysis.azure_utils import JobID
    return get_space_union_and_unique(search_space_common, search_space_unique,
-                                      [JobID.get_full_data_name(dataset_name, subdataset_name)])
+                                      [JobID.get_full_data_name(
+                                          dataset_name_list,
+                                          subdataset_name)])
--- a/flaml/nlp/hpo/grid_searchspace_auto.py
+++ b/flaml/nlp/hpo/grid_searchspace_auto.py
@ -6,7 +6,9 @@ from .get_grid_search_space import \
     get_roberta_space,
     get_funnel_space,
     get_deberta_space,
-     get_albert_space
+     get_albert_space,
+     get_longformer_space,
+     get_mobilebert_space
     )

 GRID_SEARCH_SPACE_MAPPING = OrderedDict(
@ -17,6 +19,8 @@ GRID_SEARCH_SPACE_MAPPING = OrderedDict(
        ("funnel", get_funnel_space),
        ("deberta", get_deberta_space),
        ("albert", get_albert_space),
+        ("mobilebert", get_mobilebert_space),
+        ("longformer", get_longformer_space)
    ]
 )

@ -53,7 +57,7 @@ class AutoGridSearchSpace:
    def from_model_and_dataset_name(cls,
                                    model_type,
                                    model_size_type,
-                                    dataset_name,
+                                    dataset_name_list: list = None,
                                    subdataset_name=None,
                                    algo_mode=None):
        """
@ -67,7 +71,7 @@ class AutoGridSearchSpace:
            model_size_type:
                A string variable which is the size of the model, e.g., "small"

-            dataset_name:
+            dataset_name_list:
                A string variable which is the dataset name, e.g., "glue"

            subdataset_name:
@ -77,17 +81,17 @@ class AutoGridSearchSpace:
                A string variable which is the algorithm mode for grid search, e.g., "gridbert"

        Example:
-            >>> AutoGridSearchSpace.from_model_and_dataset_name("electra", "small", "glue", "rte", "grid")
+            >>> AutoGridSearchSpace.from_model_and_dataset_name("electra", "small", ["glue"], "rte", "grid")

        """

        if model_type in GRID_SEARCH_SPACE_MAPPING.keys():
            this_model_recommended_space = GRID_SEARCH_SPACE_MAPPING[model_type](
-                model_size_type, dataset_name, subdataset_name, algo_mode)
+                model_size_type, dataset_name_list, subdataset_name, algo_mode)
            return this_model_recommended_space
        raise ValueError(
            "Unrecognized method {},{} for this kind of AutoGridSearchSpace: {}.\n"
            "Method name should be one of {}.".format(
-                model_type, dataset_name, cls.__name__, ", ".join(c.__name__ for c in GRID_SEARCH_SPACE_MAPPING.keys())
+                model_type, dataset_name_list, cls.__name__, ", ".join(GRID_SEARCH_SPACE_MAPPING.keys())
            )
        )
--- a/flaml/nlp/hpo/hpo_searchspace.py
+++ b/flaml/nlp/hpo/hpo_searchspace.py
@ -1,29 +1,32 @@
 from collections import OrderedDict
-
-from ..huggingface.trainer import TrainerForAutoTransformers
-from ray import tune
-from transformers import TrainingArguments
-
 from .grid_searchspace_auto import AutoGridSearchSpace


-def hpo_space_custom(**custom_hpo_args):
+def hpo_space_custom(model_type=None,
+                     model_size_type=None,
+                     dataset_name_list: list = None,
+                     subdataset_name=None,
+                     algo_mode=None,
+                     **custom_hpo_args):
+    """
+    The 5 arguments here cannot be deleted, they need to be kept consistent with
+    other functions in HPO_SEARCH_SPACE_MAPPING
+    """
    assert "hpo_space" in custom_hpo_args
    custom_search_space = custom_hpo_args["hpo_space"]
    return custom_search_space


-def bounded_gridunion(logger=None,
-                      model_type=None,
+def bounded_gridunion(model_type=None,
                      model_size_type=None,
-                      dataset_name=None,
+                      dataset_name_list: list = None,
                      subdataset_name=None,
+                      algo_mode=None,
                      **custom_hpo_args):
    assert "bound" in custom_hpo_args
-    gridunion_space = HPO_SEARCH_SPACE_MAPPING["uni"](logger,
-                                                      model_type,
+    gridunion_space = HPO_SEARCH_SPACE_MAPPING["uni"](model_type,
                                                      model_size_type,
-                                                      dataset_name,
+                                                      dataset_name_list,
                                                      subdataset_name,
                                                      **custom_hpo_args)
    for each_key in custom_hpo_args["bound"].keys():
@ -50,29 +53,30 @@ def bounded_gridunion(logger=None,
    return gridunion_space


-def hpo_space_gridunion(logger=None,
-                        model_type=None,
+def hpo_space_gridunion(model_type=None,
                        model_size_type=None,
-                        dataset_name=None,
+                        dataset_name_list: list = None,
                        subdataset_name=None,
+                        algo_mode=None,
                        **custom_hpo_args):
    output_config = {}
-    for each_model_type in {"electra", "roberta", "bert"}:
+    for each_model_type in ["bert", "roberta", "electra"]:
        # if each_model_type == model_type: continue
        this_config = AutoGridSearchSpace.from_model_and_dataset_name(
-            each_model_type, model_size_type, dataset_name, subdataset_name, "hpo")
+            each_model_type, model_size_type, dataset_name_list, subdataset_name, "hpo")
        from ..utils import merge_dicts
        output_config = merge_dicts(output_config, this_config)
        default_values = {}
        """
        adding the default configuration from transformers/training_args.py into hpo space
        """
+        from transformers import TrainingArguments
        training_args = TrainingArguments(output_dir=".")
        for each_hp in output_config.keys():
            try:
                default_values[each_hp] = [getattr(training_args, each_hp)]
            except AttributeError:
-                pass
+                print("training args do not contain {}, passed".format(each_hp))

        output_config = merge_dicts(output_config, default_values)

@ -80,27 +84,26 @@ def hpo_space_gridunion(logger=None,


 def hpo_space_gridunion_smoke_test(
-        logger=None,
        model_type=None,
        model_size_type=None,
-        dataset_name=None,
+        dataset_name_list: list = None,
        subdataset_name=None,
+        algo_mode=None,
        **custom_hpo_args):
-    return {'learning_rate': [1e-5],
-            'weight_decay': [0.0],
-            'adam_epsilon': [1e-08],
-            'warmup_ratio': [0.1],
-            'per_device_train_batch_size': [2],
-            'hidden_dropout_prob': [0.1],
-            'attention_probs_dropout_prob': [0.1],
-            'num_train_epochs': [0.1]}
+    return {
+        "learning_rate": {"l": 1e-6, "u": 1e-3, "space": "log"},
+        "num_train_epochs": [0.01],
+        "per_device_train_batch_size": [2],
+        "warmup_ratio": {"l": 0.0, "u": 0.3, "space": "linear"},
+        "weight_decay": {"l": 0.0, "u": 0.3, "space": "linear"}
+    }


-def hpo_space_generic(logger=None,
-                      model_type=None,
+def hpo_space_generic(model_type=None,
                      model_size_type=None,
-                      dataset_name=None,
+                      dataset_name_list: list = None,
                      subdataset_name=None,
+                      algo_mode=None,
                      **custom_hpo_args):
    output_config = {
        "learning_rate": {"l": 1e-6, "u": 1e-3, "space": "log"},
@ -112,11 +115,11 @@ def hpo_space_generic(logger=None,
    return output_config


-def hpo_space_generic_grid(logger=None,
-                           model_type=None,
+def hpo_space_generic_grid(model_type=None,
                           model_size_type=None,
-                           dataset_name=None,
+                           dataset_name_list: list = None,
                           subdataset_name=None,
+                           algo_mode=None,
                           **custom_hpo_args):
    output_config = {
        "learning_rate": [1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 1e-4, 1.5e-4],
@ -128,14 +131,14 @@ def hpo_space_generic_grid(logger=None,
    return output_config


-def hpo_space_small(logger=None,
-                    model_type=None,
+def hpo_space_small(model_type=None,
                    model_size_type=None,
-                    dataset_name=None,
+                    dataset_name_list: list = None,
                    subdataset_name=None,
+                    algo_mode=None,
                    **custom_hpo_args):
    config_json = AutoGridSearchSpace.from_model_and_dataset_name(
-        model_type, model_size_type, dataset_name, subdataset_name, "hpo")
+        model_type, model_size_type, dataset_name_list, subdataset_name, "hpo")
    output_config = {}

    for each_hp in config_json.keys():
@ -158,13 +161,28 @@ def hpo_space_small(logger=None,
    return output_config


+def hpo_space_grid(model_type=None,
+                   model_size_type=None,
+                   dataset_name_list: list = None,
+                   subdataset_name=None,
+                   algo_mode=None,
+                   **custom_hpo_args):
+    return AutoGridSearchSpace.from_model_and_dataset_name(model_type,
+                                                           model_size_type,
+                                                           dataset_name_list,
+                                                           subdataset_name,
+                                                           algo_mode
+                                                           )
+
+
 HPO_SEARCH_SPACE_MAPPING = OrderedDict(
    [
+        ("grid", hpo_space_grid),
        ("uni", hpo_space_gridunion),
        ("gnr", hpo_space_generic),
        ("uni_test", hpo_space_gridunion_smoke_test),
        ("cus", hpo_space_custom),
-        ("buni", bounded_gridunion)
+        ("buni", bounded_gridunion),
    ]
 )

@ -181,29 +199,33 @@ class AutoHPOSearchSpace:
    def __init__(self):
        raise EnvironmentError(
            "AutoHPOSearchSpace is designed to be instantiated "
-            "using the `AutoHPOSearchSpace.from_config_and_method_name(cls, logger,hpo_searchspace_name,"
-            "model_type,model_size_type,dataset_name,subdataset_name = None,**custom_hpo_args)` methods."
+            "using the `AutoHPOSearchSpace.from_config_and_method_name(cls, hpo_searchspace_name,"
+            "model_type,model_size_type,dataset_name,subdataset_name=None,**custom_hpo_args)` methods."
        )

    @classmethod
    def from_model_and_dataset_name(cls,
-                                    logger,
                                    hpo_searchspace_mode,
                                    model_type,
                                    model_size_type,
-                                    dataset_name,
+                                    dataset_name_list: list = None,
                                    subdataset_name=None,
+                                    algo_mode=None,
                                    **custom_hpo_args):
        """
        Instantiate one of the classes for getting the hpo search space from the search space name, model type,
        model size type, dataset name and sub dataset name

        Args:
-            logger:
-                Reference to the logger

            hpo_searchspace_mode:
-                A string variable which is name of the hpo search space, e.g., "uni"
+                A string variable which is the mode of the hpo search space, it must be chosen from the following options:
+                    - uni: the union of BERT, RoBERTa and Electra's grid configs
+                    - grid: the recommended grid config of the LM specified in jobconfig.pre
+                    - gnr: the generic continuous search space
+                    - uni_test: the search space for smoke test
+                    - cus: user customized search space, specified in the "hpo_space" argument in AutoTransformers.fit
+                    - buni: bounded grid union search space

            model_type:
                A string variable which is the type of the model, e.g., "electra"
@ -221,22 +243,22 @@ class AutoHPOSearchSpace:
                Any additional keyword argument to be used for the function for the HPO search space

        Example:
-            >>> AutoHPOSearchSpace.from_model_and_dataset_name(logger, "uni", "electra", "small", "glue", "rte")
+            >>> AutoHPOSearchSpace.from_model_and_dataset_name("uni", "electra", "small", ["glue"], "rte", "hpo")
        """

        if hpo_searchspace_mode in HPO_SEARCH_SPACE_MAPPING.keys():
            hpo_space = HPO_SEARCH_SPACE_MAPPING[hpo_searchspace_mode](
-                logger,
                model_type,
                model_size_type,
-                dataset_name,
+                dataset_name_list,
                subdataset_name,
+                algo_mode,
                **custom_hpo_args)
            return hpo_space
        raise ValueError(
            "Unrecognized method {},{} for this kind of AutoHPOSearchSpace: {}.\n"
            "Method name should be one of {}.".format(
-                hpo_searchspace_mode, dataset_name, cls.__name__,
-                ", ".join(c.__name__ for c in HPO_SEARCH_SPACE_MAPPING.keys())
+                hpo_searchspace_mode, dataset_name_list, cls.__name__,
+                ", ".join(HPO_SEARCH_SPACE_MAPPING.keys())
            )
        )
--- a/flaml/nlp/hpo/scheduler_auto.py
+++ b/flaml/nlp/hpo/scheduler_auto.py
@ -1,5 +1,5 @@
 from collections import OrderedDict
-from ray.tune.schedulers import ASHAScheduler, HyperBandScheduler, MedianStoppingRule
+from ray.tune.schedulers import ASHAScheduler, HyperBandScheduler

 SCHEDULER_MAPPING = OrderedDict(
    [
@ -37,15 +37,13 @@ class AutoScheduler:
        Example:
            >>> AutoScheduler.from_scheduler_name("asha")
        """
-
        if scheduler_name in SCHEDULER_MAPPING.keys():
-            try:
-                return SCHEDULER_MAPPING[scheduler_name](**kwargs)
-            except TypeError:
+            if SCHEDULER_MAPPING[scheduler_name] is None:
                return None
+            return SCHEDULER_MAPPING[scheduler_name](**kwargs)
        raise ValueError(
            "Unrecognized scheduler {} for this kind of AutoScheduler: {}.\n"
            "Scheduler name should be one of {}.".format(
-                scheduler_name, cls.__name__, ", ".join(c.__name__ for c in SCHEDULER_MAPPING.keys())
+                scheduler_name, cls.__name__, ", ".join(SCHEDULER_MAPPING.keys())
            )
        )
--- a/flaml/nlp/hpo/searchalgo_auto.py
+++ b/flaml/nlp/hpo/searchalgo_auto.py
@ -3,6 +3,7 @@ from collections import OrderedDict

 import ray
 from ray.tune.suggest.optuna import OptunaSearch
+
 from flaml import CFO, BlendSearch

 SEARCH_ALGO_MAPPING = OrderedDict(
@ -55,7 +56,7 @@ class AutoSearchAlgorithm:

        Example:
        >>> from flaml.nlp.hpo.hpo_searchspace import AutoHPOSearchSpace
-        >>> search_space_hpo=AutoHPOSearchSpace.from_model_and_dataset_name(logger, "uni", "electra", "small", "glue", "rte")
+        >>> search_space_hpo=AutoHPOSearchSpace.from_model_and_dataset_name("uni", "electra", "small", ["glue"], "rte")
        >>> search_algo = AutoSearchAlgorithm.from_method_name("bs", "cus", search_space_hpo,
                         {"points_to_evaluate": [{"learning_rate": 1e-5, "num_train_epochs": 10}])
        """
@ -64,40 +65,39 @@ class AutoSearchAlgorithm:
        if not search_algo_name:
            search_algo_name = "grid"
        if search_algo_name in SEARCH_ALGO_MAPPING.keys():
-            try:
-                """
-                filtering the customized args for hpo from custom_hpo_args, keep those
-                which are in the input variable name list of the constructor of
-                the algorithm, remove those which does not appear in the input variables
-                of the constructor function
-                """
-                this_search_algo_kwargs = None
-                allowed_arguments = SEARCH_ALGO_MAPPING[search_algo_name].__init__.__code__.co_varnames
-                allowed_custom_args = {key: custom_hpo_args[key] for key in custom_hpo_args.keys() if
-                                       key in allowed_arguments}
-
-                """
-                 If the search_algo_args_mode is "dft", set the args to the default args, e.g.,the default args for
-                 BlendSearch is "low_cost_partial_config": {"num_train_epochs": min_epoch,"per_device_train_batch_size"
-                 : max(hpo_search_space["per_device_train_batch_size"].categories)},
-                """
-                if search_algo_args_mode == "dft":
-                    this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name](
-                        "dft", hpo_search_space=hpo_search_space, **allowed_custom_args)
-                elif search_algo_args_mode == "cus":
-                    this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name](
-                        "cus", hpo_search_space=hpo_search_space, **allowed_custom_args)
-
-                """
-                returning the hpo algorithm with the arguments
-                """
-                return SEARCH_ALGO_MAPPING[search_algo_name](**this_search_algo_kwargs)
-            except KeyError:
+            if search_algo_name == "grid":
                return None
+            """
+            filtering the customized args for hpo from custom_hpo_args, keep those
+            which are in the input variable name list of the constructor of
+            the algorithm, remove those which does not appear in the input variables
+            of the constructor function
+            """
+            this_search_algo_kwargs = None
+            allowed_arguments = SEARCH_ALGO_MAPPING[search_algo_name].__init__.__code__.co_varnames
+            allowed_custom_args = {key: custom_hpo_args[key] for key in custom_hpo_args.keys() if
+                                   key in allowed_arguments}
+
+            """
+             If the search_algo_args_mode is "dft", set the args to the default args, e.g.,the default args for
+             BlendSearch is "low_cost_partial_config": {"num_train_epochs": min_epoch,"per_device_train_batch_size"
+             : max(hpo_search_space["per_device_train_batch_size"].categories)},
+            """
+            if search_algo_args_mode == "dft":
+                this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name](
+                    "dft", hpo_search_space=hpo_search_space, **allowed_custom_args)
+            elif search_algo_args_mode == "cus":
+                this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name](
+                    "cus", hpo_search_space=hpo_search_space, **allowed_custom_args)
+
+            """
+            returning the hpo algorithm with the arguments
+            """
+            return SEARCH_ALGO_MAPPING[search_algo_name](**this_search_algo_kwargs)
        raise ValueError(
            "Unrecognized method {} for this kind of AutoSearchAlgorithm: {}.\n"
            "Method name should be one of {}.".format(
-                search_algo_name, cls.__name__, ", ".join(c.__name__ for c in SEARCH_ALGO_MAPPING.keys())
+                search_algo_name, cls.__name__, ", ".join(SEARCH_ALGO_MAPPING.keys())
            )
        )

--- a/flaml/nlp/huggingface/init.py
+++ b/flaml/nlp/huggingface/init.py
--- a/flaml/nlp/huggingface/switch_head_auto.py
+++ b/flaml/nlp/huggingface/switch_head_auto.py
@ -45,8 +45,8 @@ class AutoSeqClassificationHead:
        if model_type in MODEL_CLASSIFICATION_HEAD_MAPPING.keys():
            return MODEL_CLASSIFICATION_HEAD_MAPPING[model_type](config)
        raise ValueError(
-            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
+            "Unrecognized configuration class {} for class {}.\n"
            "Model type should be one of {}.".format(
-                config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_CLASSIFICATION_HEAD_MAPPING.keys())
+                config.__class__, cls.__name__, ", ".join(MODEL_CLASSIFICATION_HEAD_MAPPING.keys())
            )
        )
--- a/flaml/nlp/huggingface/trainer.py
+++ b/flaml/nlp/huggingface/trainer.py
@ -1,14 +1,6 @@
-import copy
 import os
-
 import transformers

-from ray import tune
-import torch
-from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
-
-transformers.logging.set_verbosity_error()
-

 class TrainerForAutoTransformers(transformers.Trainer):
    """
@ -18,12 +10,6 @@ class TrainerForAutoTransformers(transformers.Trainer):
            huggingface (:class:`~transformers.PreTrainedModel` or :obj:`torch.nn.Module`, `optional`):
    """

-    def get_optimizers(
-            self, num_training_steps
-    ):
-        self.current_optimizer, self.current_scheduler = super().get_optimizers(num_training_steps)
-        return (self.current_optimizer, self.current_scheduler)
-
    def evaluate(self,
                 eval_dataset=None):
        """
@ -33,7 +19,8 @@ class TrainerForAutoTransformers(transformers.Trainer):
                eval_dataset:
                    the dataset to be evaluated
        """
-        import wandb
+        from ray import tune
+
        eval_dataloader = self.get_eval_dataloader(eval_dataset)
        output = self.prediction_loop(
            eval_dataloader, description="Evaluation")
@ -53,6 +40,10 @@ class TrainerForAutoTransformers(transformers.Trainer):
                Overriding transformers.Trainer.save_state. It is only through saving
                the states can best_trial.get_best_checkpoint return a non-empty value.
        """
+        import torch
+        from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
+        from ray import tune
+
        with tune.checkpoint_dir(step=self.state.global_step) as checkpoint_dir:
            self.args.output_dir = checkpoint_dir
            # This is the directory name that Huggingface requires.
@ -111,11 +102,3 @@ class TrainerForAutoTransformers(transformers.Trainer):
            per_device_train_batch_size,
            device_count)
        return float(warmup_steps / max_steps)
-
-    @staticmethod
-    def resolve_hp_conflict(search_space_dict):
-        if "max_steps" in search_space_dict and "num_train_epochs" in search_space_dict:
-            del search_space_dict["num_train_epochs"]
-        if "warmup_ratio" in search_space_dict and "warmup_steps" in search_space_dict:
-            del search_space_dict["warmup_ratio"]
-        return search_space_dict
--- a/flaml/nlp/result_analysis/azure_utils.py
+++ b/flaml/nlp/result_analysis/azure_utils.py
@ -1,14 +1,60 @@
 import re
 import pathlib
 import os
-from azure.storage.blob import BlobServiceClient, ContainerClient
-from transformers import AutoConfig
-
-from ..utils import get_wandb_azure_key
 from datetime import datetime
 from dataclasses import dataclass, field
-from ..hpo.grid_searchspace_auto import HF_MODEL_LIST
 import json
+from typing import Tuple, List, Union, Optional
+import argparse
+
+
+class ConfigScore:
+    trial_id: str = field(default=None)
+    start_time: float = field(default=None)
+    last_update_time: float = field(default=None)
+    config: dict = field(default=None)
+    metric_score: dict = field(default=None)
+    time_stamp: float = field(default=None)
+
+    def __init__(self,
+                 trial_id: str = None,
+                 start_time: float = None,
+                 last_update_time: float = None,
+                 config: dict = None,
+                 metric_score: dict = None,
+                 time_stamp: float = None
+                 ):
+        self.trial_id = trial_id
+        self.start_time = start_time
+        self.last_update_time = last_update_time
+        self.config = config
+        self.metric_score = metric_score
+        self.time_stamp = time_stamp
+
+
+class ConfigScoreList:
+
+    def __init__(self,
+                 config_score_list: List[ConfigScore],
+                 jobid_config=None,
+                 blob_file=None,
+                 ):
+        self._config_score_list = config_score_list
+        self._blob_file = blob_file
+        self._jobid_config = jobid_config
+
+    def sorted(self, sort_method="unsorted", metric_mode="max"):
+        if sort_method == "unsorted":
+            self._config_score_list = self._config_score_list
+        elif sort_method == "sort_time":
+            self._config_score_list = sorted(self._config_score_list, key=lambda x: x.start_time, reverse=False)
+        else:
+            self._config_score_list = sorted(self._config_score_list,
+                                             key=lambda x: getattr(x, "metric_score")[metric_mode], reverse=True)
+
+    def get_best_config(self,
+                        metric_mode="max"):
+        return max(self._config_score_list, key=lambda x: getattr(x, "metric_score")[metric_mode])


@dataclass
@ -27,6 +73,8 @@ class JobID:
    rep: int = field(default=0)
    sddt: int = field(default=None)
    sdhf: int = field(default=None)
+    var1: Optional[float] = field(default=None)
+    var2: Optional[float] = field(default=None)

    def __init__(self,
                 console_args=None):
@ -41,7 +89,7 @@ class JobID:
        self.subdat = "mrpc"
        self.mod = "hpo"
        self.spa = "uni_test"
-        self.arg = "dft"
+        self.arg = "cus"
        self.alg = "bs"
        self.pru = "None"
        self.pre_full = "google/mobilebert-uncased"
@ -51,6 +99,8 @@ class JobID:
        self.rep = 0
        self.sddt = 43
        self.sdhf = 42
+        self.var1 = None
+        self.var2 = None

    def is_match(self, partial_jobid):
        """
@ -90,7 +140,7 @@ class JobID:
            preparing for the job ID for wandb
        """
        field_dict = self.__dict__
-        keytoval_str = "_".join([JobID.dataset_list_to_str(field_dict[key], key)
+        keytoval_str = "_".join([JobID.dataset_list_to_str(field_dict[key])
                                 if type(field_dict[key]) == list
                                 else str(field_dict[key])
                                 for key in field_dict.keys() if not key.endswith("_full")])
@ -102,7 +152,7 @@ class JobID:
        """
        list_keys = list(JobID.__dataclass_fields__.keys())
        field_dict = self.__dict__
-        keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key], key)
+        keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key])
                                 if type(field_dict[key]) == list
                                 else key + "=" + str(field_dict[key])
                                 for key in list_keys if not key.endswith("_full")])
@ -114,7 +164,7 @@ class JobID:
        """
        list_keys = list(JobID.__dataclass_fields__.keys())
        field_dict = self.__dict__  # field_dict contains fields whose values are not None
-        keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key], key)
+        keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key])
                                 if type(field_dict[key]) == list
                                 else key + "=" + str(field_dict[key])
                                 for key in list_keys if key in field_dict.keys()])
@ -131,9 +181,20 @@ class JobID:
                                   pre = 'funnel', presz = 'xlarge', spt = 'rspt',
                                   rep = 0, sddt = 43, sdhf = 42)
        """
-        field_keys = [key for key in
-                      list(JobID.__dataclass_fields__.keys()) if not key.endswith("_full")]
-        regex_expression = ".*" + "_".join([key + "=(?P<" + key + ">.*)" for key in field_keys]) + ".(json|zip)"
+        field_keys = [key for key in list(JobID.__dataclass_fields__.keys()) if not key.endswith("_full")]
+        regex_expression = ".*"
+        is_first = True
+        for key in field_keys:
+            if is_first:
+                prefix = ""
+                is_first = False
+            else:
+                prefix = "_"
+            if key.startswith("sd") or key.startswith("var"):
+                regex_expression += "(" + prefix + key + "=(?P<" + key + ">[^_]*))?"
+            else:
+                regex_expression += prefix + key + "=(?P<" + key + ">[^_]*)"
+        regex_expression += ".(json|zip)"
        result = re.search(regex_expression, keytoval_str)
        if result:
            result_dict = {}
@ -142,8 +203,13 @@ class JobID:
                    result_dict[key] = [result.group(key)]
                elif key == "rep":
                    try:
-                        result_dict[key] = int(result.group(key))
-                    except IndexError:
+                        try:
+                            result_dict[key] = int(result.group(key))
+                        except IndexError:
+                            print("No group {} in the regex result".format(key))
+                            result_dict[key] = -1
+                    except ValueError:
+                        print("Cannot parse integer {}".format(result.group(key)))
                        result_dict[key] = -1
                else:
                    result_dict[key] = result.group(key)
@ -152,24 +218,23 @@ class JobID:
            return None

    @staticmethod
-    def dataset_list_to_str(dataset_name, key):
-        if key == "dat":
-            assert isinstance(dataset_name, list)
+    def dataset_list_to_str(dataset_name, key="dat"):
+        if isinstance(dataset_name, list):
            return "-".join(dataset_name)
        else:
            return dataset_name

-    @staticmethod
    def set_jobid_from_arg_list(self,
                                **jobid_list
                                ):
        """
            set the jobid from a dict object
        """
-
        for key in jobid_list.keys():
            assert key in JobID.__dataclass_fields__.keys()
            setattr(self, key, jobid_list[key])
+        if self.mod == "grid":
+            self.alg = "grid"

    @staticmethod
    def convert_blobname_to_jobid(blobname):
@ -185,11 +250,14 @@ class JobID:
            return None

    @staticmethod
-    def get_full_data_name(dataset_name, subdataset_name=None):
+    def get_full_data_name(dataset_name: Union[list, str], subdataset_name=None):
        """
            convert a dataset name and sub dataset name to a full dataset name
        """
-        full_dataset_name = dataset_name
+        if isinstance(dataset_name, list):
+            full_dataset_name = JobID.dataset_list_to_str(dataset_name)
+        else:
+            full_dataset_name = dataset_name
        if subdataset_name:
            full_dataset_name = full_dataset_name + "_" + subdataset_name
        return full_dataset_name
@ -198,10 +266,11 @@ class JobID:
        """
            get the full dataset name of the current JobID object
        """
-        return JobID.get_full_data_name(JobID.dataset_list_to_str(self.dat, "dat"), self.subdat)
+        return JobID.get_full_data_name(JobID.dataset_list_to_str(self.dat), self.subdat)

    @staticmethod
    def _extract_model_type_with_keywords_match(pre_full):
+        from ..hpo.grid_searchspace_auto import HF_MODEL_LIST
        matched_model_type = []
        for each_model_type in HF_MODEL_LIST:
            if each_model_type in pre_full:
@ -211,109 +280,64 @@ class JobID:

    @staticmethod
    def extract_model_type(full_model_name):
+        from transformers import AutoConfig
        model_config = AutoConfig.from_pretrained(full_model_name)
        config_json_file = model_config.get_config_dict(full_model_name)[0]
        try:
            model_type = config_json_file["model_type"]
        except KeyError:
-            model_type = JobID._extract_model_type_with_keywords_match()
+            print("config_json_file does not contain model_type, re-extracting with keywords matching")
+            model_type = JobID._extract_model_type_with_keywords_match(full_model_name)
        return model_type

-    def set_jobid_from_console_args(self, console_args):
-        self.dat = console_args.dataset_subdataset_name.split(":")[0].split(",")
-        self.subdat = console_args.dataset_subdataset_name.split(":")[1]
-        self.mod = console_args.algo_mode
-        self.spa = console_args.space_mode
-        self.arg = console_args.search_alg_args_mode
-        self.alg = console_args.algo_name
-        self.pru = console_args.pruner
-        self.pre_full = console_args.pretrained_model_size.split(":")[0]
-        self.pre = JobID.extract_model_type(self.pre_full)
-        self.presz = console_args.pretrained_model_size.split(":")[1]
-        self.spt = console_args.resplit_mode
-        self.rep = console_args.rep_id
-        self.sddt = console_args.seed_data
-        self.sdhf = console_args.seed_transformers
-
    @staticmethod
-    def legacy_old_blobname_to_new_blobname(self,
-                                            old_blobname):
-        spa_id2val = {
-            0: "gnr",
-            1: "uni"
+    def get_attrval_from_arg_or_dict(console_args: Union[argparse.ArgumentParser, dict], each_key):
+        if type(console_args) == argparse.Namespace:
+            return getattr(console_args, each_key)
+        else:
+            return console_args[each_key]
+
+    def set_jobid_from_console_args(self, console_args: Union[argparse.ArgumentParser, dict]):
+        from ..utils import pretrained_model_size_format_check, dataset_subdataset_name_format_check
+        console_to_jobid_key_mapping = {
+            "pretrained_model_size": "pre",
+            "dataset_subdataset_name": "dat",
+            "algo_mode": "mod",
+            "space_mode": "spa",
+            "search_alg_args_mode": "arg",
+            "algo_name": "alg",
+            "pruner": "pru",
+            "resplit_mode": "spt",
+            "rep_id": "rep",
+            "seed_data": "sddt",
+            "seed_transformers": "sdhf",
+            "optarg1": "var1",
+            "optarg2": "var2"
        }
-        alg_id2val = {
-            0: "bs",
-            1: "optuna",
-            2: "cfo"
-        }
-        pre_id2val = {
-            0: "xlnet-base-cased",
-            1: "albert-large-v1",
-            2: "distilbert-base-uncased",
-            3: "microsoft/deberta-base",
-            4: "funnel-transformer/small-base",
-            5: "microsoft/deberta-large",
-            6: "funnel-transformer/large-base",
-            7: "funnel-transformer/intermediate-base",
-            8: "funnel-transformer/xlarge-base"
-        }
-        presz_id2val = {
-            0: "base",
-            1: "small",
-            2: "base",
-            3: "base",
-            4: "base",
-            5: "large",
-            6: "large",
-            7: "intermediate",
-            8: "xlarge"
-        }
-        spt_id2val = {
-            0: "rspt",
-            1: "ori"
-        }
-        result_grid = re.search(r".*_mod(el)?(?P<model_id>\d+)_None_None(_spt(?P<split_id>\d+))?_rep(?P<rep_id>\d+).log",
-                                old_blobname)
-        result = re.search(
-            r".*_mod(el)?(?P<model_id>\d+)_(alg)?(?P<algo_id>\d+)_(spa)?"
-            r"(?P<space_id>\d+)(_spt(?P<split_id>\d+))?_rep(?P<rep_id>\d+).log",
-            old_blobname)
-        if result_grid:
-            dat = [old_blobname.split("/")[1].split("_")[0]]
-            subdat = old_blobname.split("/")[1].split("_")[1]
-            mod = "hpo"
-            spa = None
-            arg = None
-            alg = None
-            pru = None
-            pre = pre_id2val[int(result_grid.group("model_id"))]
-            presz = presz_id2val[int(result_grid.group("model_id"))]
+        for each_key in console_to_jobid_key_mapping.keys():
            try:
-                spt = spt_id2val[int(result_grid.group("split_id"))]
+                try:
+                    if each_key == "dataset_subdataset_name":
+                        dataset_subdataset_name_format_check(getattr(console_args, each_key))
+                        self.dat = JobID.get_attrval_from_arg_or_dict(console_args, each_key).split(":")[0].split(",")
+                        self.subdat = JobID.get_attrval_from_arg_or_dict(console_args, each_key).split(":")[1]
+                    elif each_key == "pretrained_model_size":
+                        pretrained_model_size_format_check(getattr(console_args, each_key))
+                        self.pre_full = JobID.get_attrval_from_arg_or_dict(console_args, each_key).split(":")[0]
+                        self.pre = JobID.extract_model_type(self.pre_full)
+                        self.presz = JobID.get_attrval_from_arg_or_dict(console_args, each_key).split(":")[1]
+                    else:
+                        jobid_key = console_to_jobid_key_mapping[each_key]
+                        attrval = JobID.get_attrval_from_arg_or_dict(console_args, each_key)
+                        setattr(self, jobid_key, attrval)
+                except AttributeError:
+                    print("console_args has no attribute {}, continue".format(each_key))
+                    continue
            except KeyError:
-                spt = spt_id2val[0]
-            rep = None
-            self.set_jobid_from_arg_list(dat, subdat, mod, spa, arg, alg, pru, pre, presz, spt, rep)
-            return self.to_jobid_string()
-        if result:
-            dat = [old_blobname.split("/")[1].split("_")[0]]
-            subdat = old_blobname.split("/")[1].split("_")[1]
-            mod = "hpo"
-            spa = spa_id2val[int(result.group("space_id"))]
-            arg = "dft"
-            alg = alg_id2val[int(result.group("algo_id"))]
-            pru = "None"
-            pre = pre_id2val[int(result_grid.group("model_id"))]
-            presz = presz_id2val[int(result_grid.group("model_id"))]
-            try:
-                spt = spt_id2val[int(result_grid.group("split_id"))]
-            except KeyError:
-                spt = spt_id2val[0]
-            rep = int(result.group("rep_id"))
-            self.set_jobid_from_arg_list(dat, subdat, mod, spa, arg, alg, pru, pre, presz, spt, rep)
-            return self.to_jobid_string()
-        return None
+                print("console_args has no attribute {}, continue".format(each_key))
+                continue
+        if self.mod == "grid":
+            self.alg = "grid"


 class AzureUtils:
@ -321,62 +345,89 @@ class AzureUtils:
    def __init__(self,
                 root_log_path=None,
                 console_args=None,
-                 jobid=None,
                 autohf=None):
+        from ..utils import get_wandb_azure_key
        if root_log_path:
            self.root_log_path = root_log_path
        else:
            self.root_log_path = "logs_azure"
-        self.jobid = jobid
+        self.jobid = autohf.jobid_config
        self.console_args = console_args
        self.autohf = autohf
        if console_args:
            wandb_key, azure_key, container_name = get_wandb_azure_key(console_args.key_path)
            self._container_name = container_name
            self._azure_key = azure_key
+        else:
+            self._container_name = self._azure_key = ""

    def _get_complete_connection_string(self):
-        return "DefaultEndpointsProtocol=https;AccountName=docws5141197765;AccountKey=" \
-               + self._azure_key + ";EndpointSuffix=core.windows.net"
+        try:
+            return "DefaultEndpointsProtocol=https;AccountName=docws5141197765;AccountKey=" \
+                + self._azure_key + ";EndpointSuffix=core.windows.net"
+        except AttributeError:
+            return "DefaultEndpointsProtocol=https;AccountName=docws5141197765;AccountKey=" \
+                ";EndpointSuffix=core.windows.net"

    def _init_azure_clients(self):
-        connection_string = self._get_complete_connection_string()
-        container_client = ContainerClient.from_connection_string(conn_str=connection_string,
-                                                                  container_name=self._container_name)
-        return container_client
+        try:
+            from azure.storage.blob import ContainerClient
+            connection_string = self._get_complete_connection_string()
+            try:
+                container_client = ContainerClient.from_connection_string(conn_str=connection_string,
+                                                                          container_name=self._container_name)
+                return container_client
+            except ValueError:
+                print("AzureUtils._container_name is specified as: {}, "
+                      "please correctly specify AzureUtils._container_name".format(self._container_name))
+                return None
+        except ImportError:
+            print("To use the azure storage component in flaml.nlp, run pip install azure-storage-blob")

    def _init_blob_client(self,
                          local_file_path):
-        connection_string = self._get_complete_connection_string()
-        blob_service_client = BlobServiceClient.from_connection_string(connection_string)
-        blob_client = blob_service_client.get_blob_client(container=self._container_name, blob=local_file_path)
-        return blob_client
+        try:
+            from azure.storage.blob import BlobServiceClient
+
+            connection_string = self._get_complete_connection_string()
+            blob_service_client = BlobServiceClient.from_connection_string(connection_string)
+            try:
+                blob_client = blob_service_client.get_blob_client(container=self._container_name, blob=local_file_path)
+                return blob_client
+            except ValueError:
+                print("_container_name is unspecified or wrongly specified, please specify _container_name in AzureUtils")
+                return None
+        except ImportError:
+            print("To use the azure storage component in flaml.nlp, run pip install azure-storage-blob")

    def upload_local_file_to_azure(self, local_file_path):
-        blob_client = self._init_blob_client(local_file_path)
-        with open(local_file_path, "rb") as fin:
-            blob_client.upload_blob(fin, overwrite=True)
+        try:
+            from azure.core.exceptions import HttpResponseError
+            try:
+                blob_client = self._init_blob_client(local_file_path)
+                if blob_client:
+                    with open(local_file_path, "rb") as fin:
+                        blob_client.upload_blob(fin, overwrite=True)
+            except HttpResponseError as err:
+                print("Cannot upload blob due to {}: {}".format("azure.core.exceptions.HttpResponseError",
+                      err))
+        except ImportError:
+            print("To use the azure storage component in flaml.nlp, run pip install azure-storage-blob")

    def download_azure_blob(self, blobname):
        blob_client = self._init_blob_client(blobname)
-        pathlib.Path(re.search("(?P<parent_path>^.*)/[^/]+$", blobname).group("parent_path")).mkdir(
-            parents=True, exist_ok=True)
-        with open(blobname, "wb") as fout:
-            fout.write(blob_client.download_blob().readall())
+        if blob_client:
+            pathlib.Path(re.search("(?P<parent_path>^.*)/[^/]+$", blobname).group("parent_path")).mkdir(
+                parents=True, exist_ok=True)
+            with open(blobname, "wb") as fout:
+                fout.write(blob_client.download_blob().readall())

-    def write_exception(self):
-        result_json = {
-            "timestamp": datetime.now(),
-        }
-        local_file_path = self.generate_local_json_path()
-        self.create_local_json_and_upload(result_json, local_file_path)
-
-    def extract_log_from_analysis(self,
-                                  analysis):
+    def extract_configscore_list_from_analysis(self,
+                                               analysis):
        """
            Extracting a json object for storing the key information returned from tune.run
        """
-        json_log = []
+        configscore_list = []
        for each_trial in analysis.trials:
            trial_id = each_trial.trial_id
            start_time = each_trial.start_time
@ -385,18 +436,23 @@ class AzureUtils:
            try:
                metric_score = each_trial.metric_analysis["eval_" + analysis.default_metric]
                time_stamp = each_trial.metric_analysis['timestamp']
-                json_log.append({"trial_id": trial_id,
-                                 "start_time": start_time,
-                                 "last_update_time": last_update_time,
-                                 "config": config,
-                                 "metric_score": metric_score,
-                                 "time_stamp": time_stamp})
            except KeyError:
-                pass
-        return json_log
+                print("KeyError, {} does not contain the key {} or {}".format("each_trial.metric_analysis",
+                                                                              "eval_" + analysis.default_metric,
+                                                                              "timestamp"))
+                metric_score = 0
+                time_stamp = 0
+            configscore_list.append(ConfigScore(
+                trial_id=trial_id,
+                start_time=start_time,
+                last_update_time=last_update_time,
+                config=config,
+                metric_score=metric_score,
+                time_stamp=time_stamp))
+        return configscore_list

    def write_autohf_output(self,
-                            json_log=None,
+                            configscore_list=None,
                            valid_metric=None,
                            predictions=None,
                            duration=None):
@ -405,8 +461,8 @@ class AzureUtils:
        """
        local_file_path = self.generate_local_json_path()
        output_json = {}
-        if json_log:
-            output_json["val_log"] = json_log
+        if configscore_list:
+            output_json["val_log"] = [configscore.__dict__ for configscore in configscore_list]
        if valid_metric:
            output_json["valid_metric"] = valid_metric
        if duration:
@ -432,47 +488,6 @@ class AzureUtils:
            fout.flush()
            self.upload_local_file_to_azure(local_file_path)

-    def legacy_to_json(self):
-        container_client = self._init_azure_clients()
-        for old_blob in container_client.list_blobs():
-            new_jobid_str = self.jobid.legacy_old_blobname_to_new_blobname(old_blob.name)
-            if new_jobid_str:
-                self.download_azure_blob(old_blob.name)
-                with open(old_blob.name, "r") as fin:
-                    alllines = fin.readlines()
-                    wandb_group_name = alllines[0].rstrip("\n:")
-                    timestamp = re.search(
-                        r"timestamp:(?P<timestamp>.*):",
-                        alllines[1].strip("\n")).group("timestamp")
-                    duration = re.search(
-                        r"duration:(?P<duration>.*)$",
-                        alllines[3].strip("\n")).group("duration")
-                    sample_num = int(re.search(
-                        r"sample_num: (?P<sample_num>\d+)$",
-                        alllines[4].strip("\n")).group("sample_num"))
-                    validation = {"accuracy": float(re.search(
-                        "validation accuracy: (?P<validation>.*)$",
-                        alllines[2].strip("\n")).group("validation"))}
-                    test = None
-                    if len(alllines) > 6:
-                        result_test = re.search("test accuracy:(?P<test>.*)$", alllines[6].strip("\n"))
-                        if result_test:
-                            test = json.loads(result_test.group("test"))
-                    yml_file = None
-                    if len(alllines) > 8:
-                        if alllines[8].startswith("aml"):
-                            yml_file = alllines[8].strip("\n")
-                    new_json = {"wandb_group_name": wandb_group_name,
-                                "validation": validation,
-                                "test": test,
-                                "timestamp": timestamp,
-                                "duration": duration,
-                                "sample_num": sample_num,
-                                "yml_file": yml_file}
-                    full_dataset_name = self.jobid.get_jobid_full_data_name()
-                    new_blobname = os.path.join("logs_azure/", full_dataset_name, new_jobid_str + ".json")
-                    self.create_local_json_and_upload(new_json, new_blobname)
-
    def create_local_prediction_and_upload(self,
                                           local_json_file,
                                           predictions):
@ -480,198 +495,104 @@ class AzureUtils:
            store predictions (a .zip file) locally and upload
        """
        azure_save_file_name = local_json_file.split("/")[-1][:-5]
+        try:
+            output_dir = self.console_args.data_root_dir
+        except AttributeError:
+            print("console_args does not contain data_root_dir, loading the default value")
+            from ..utils import load_dft_args
+            console_args = load_dft_args()
+            output_dir = getattr(console_args, "data_root_dir")
        local_archive_path = self.autohf.output_prediction(predictions,
-                                                           output_prediction_path=self.console_args.data_root_dir + "result/",
+                                                           output_prediction_path=output_dir + "result/",
                                                           output_zip_file_name=azure_save_file_name)
        self.upload_local_file_to_azure(local_archive_path)

-    def get_ranked_configs(self, metric_mode):
-        """
-            extract the configs (ranked in descebding order by the score) for the azure file of the current object
-            (defined by self.jobid_config)
-        """
-        azure_file_path = self.generate_local_json_path()
-        self.download_azure_blob(azure_file_path)
-
-        json_log = json.load(open(azure_file_path, "r"))
-        assert "val_log" in json_log
-
-        trialid_to_score = {}
-        trialid_to_config = {}
-
-        for each_entry in json_log["val_log"]:
-            trial_id = each_entry["trial_id"]
-            config = each_entry["config"]
-            this_score = each_entry["metric_score"][metric_mode]
-            trialid_to_config[trial_id] = config
-            trialid_to_score[trial_id] = this_score
-
-        sorted_trialid_to_score = sorted(trialid_to_score.items(), key=lambda x: x[1], reverse=True)
-        return [trialid_to_config[entry[0]] for entry in sorted_trialid_to_score]
-
    @staticmethod
-    def is_after_earliest_time(this_blob, earliest_time):
+    def is_after_earliest_time(this_blob, earliest_time: Tuple[int, int, int]):
        import pytz
        utc = pytz.UTC
        if this_blob.last_modified >= utc.localize(datetime(earliest_time[0], earliest_time[1], earliest_time[2])):
            return True
        return False

-    def get_blob_list_matching_partial_jobid(self, root_log_path, partial_jobid, earliest_time=None):
+    def get_configblob_from_partial_jobid(self,
+                                          root_log_path,
+                                          partial_jobid,
+                                          earliest_time: Tuple[int, int, int] = None):
        """
            get all blobs whose jobid configs match the partial_jobid
        """
        blob_list = []
        container_client = self._init_azure_clients()
-        jobid_config = JobID()
-        for each_blob in container_client.list_blobs():
-            if each_blob.name.startswith(root_log_path):
-                each_jobconfig = jobid_config.convert_blobname_to_jobid(each_blob.name)
-                is_append = False
-                if each_jobconfig:
-                    if each_jobconfig.is_match(partial_jobid):
-                        is_append = True
-                    if earliest_time and not AzureUtils.is_after_earliest_time(each_blob, earliest_time):
-                        is_append = False
-                    if is_append:
-                        blob_list.append((each_jobconfig, each_blob))
+        if container_client:
+            for each_blob in container_client.list_blobs():
+                if each_blob.name.startswith(root_log_path):
+                    each_jobconfig = JobID.convert_blobname_to_jobid(each_blob.name)
+                    is_append = False
+                    if each_jobconfig:
+                        if each_jobconfig.is_match(partial_jobid):
+                            is_append = True
+                        if earliest_time and not AzureUtils.is_after_earliest_time(each_blob, earliest_time):
+                            is_append = False
+                        if is_append:
+                            blob_list.append((each_jobconfig, each_blob))
        return blob_list

-    @staticmethod
-    def extract_config_and_score(blobname):
-        data_json = json.load(open(blobname, "r"))
-        return [(x['config'], x['metric_score']["max"], x['start_time']) for x in data_json['val_log']]
-
    def get_config_and_score_from_partial_jobid(self,
-                                                root_log_path,
-                                                partial_jobid,
-                                                group_attrs,
-                                                method,
-                                                earliest_time=None):
+                                                root_log_path: str,
+                                                partial_jobid: JobID,
+                                                earliest_time: Tuple[int, int, int] = None):
        """
-            get the best config and best score for each job matching the partial_jobid
-        """
-        matched_blob_list = self.get_blob_list_matching_partial_jobid(
+           Extract the config and score list from a partial config id
+
+           Args:
+               root_log_path:
+                   The root log path in azure blob storage, e.g., "logs_seed/"
+
+               partial_jobid:
+                   The partial jobid for matching the blob list
+
+               earliest_time (optional):
+                   The earliest starting time for any matched blob, for filtering out out-dated jobs,
+                   format: (YYYY, MM, DD)
+
+           Return:
+               a ConfigScore list object which stores the config and scores list for each matched blob lists
+
+       """
+        assert isinstance(root_log_path, str), "root_log_path must be of type str"
+        assert isinstance(partial_jobid, JobID), "partial_jobid must be of type JobID"
+        if earliest_time:
+            assert isinstance(earliest_time, tuple), "earliest_time must be a tuple of (YYYY, MM, DD)"
+
+        matched_blob_list = self.get_configblob_from_partial_jobid(
            root_log_path,
            partial_jobid,
            earliest_time=earliest_time)
-        group_dict = {}
-        for (each_jobconfig, each_blob) in matched_blob_list:
-            self.download_azure_blob(each_blob.name)
-            config_and_score = AzureUtils.extract_config_and_score(each_blob.name)
-            if method == "unsorted":
-                sorted_config_and_score = config_and_score
-            elif method == "sort_time":
-                sorted_config_and_score = sorted(config_and_score, key=lambda x: x[2], reverse=False)
-            else:
-                sorted_config_and_score = sorted(config_and_score, key=lambda x: x[1], reverse=True)
-            group_attr_list = []
-            for each_attr in group_attrs:
-                group_val = getattr(each_jobconfig, each_attr)
-                if isinstance(group_val, list):
-                    group_attr_list.append(JobID.dataset_list_to_str(group_val, each_attr))
-                else:
-                    group_attr_list.append(group_val)
-            group_attr_tuple = tuple(group_attr_list)
-            group_dict.setdefault(group_attr_tuple, [])
-            group_dict[group_attr_tuple].append([(config, score, each_blob.name)
-                                                 for (config, score, ts) in sorted_config_and_score])
-        return group_dict
+        return self.get_config_and_score_from_matched_blob_list(matched_blob_list,
+                                                                earliest_time)

-    def get_validation_perf(self, console_args=None, partial_jobid_config=None):
+    def get_config_and_score_from_matched_blob_list(self,
+                                                    matched_blob_list,
+                                                    earliest_time: Tuple[int, int, int] = None):
        """
-            get the validation score for all blobs matching the partial_jobid_config
-        """
-        if partial_jobid_config.pre == "electra":
-            dataset_namelist = ["wnli", "rte", "mrpc", "cola", "stsb", "sst2", "qnli", "mnli"]
-        else:
-            dataset_namelist = ["wnli", "rte", "mrpc", "cola", "stsb", "sst2"]
-        dataset_vallist1 = [0] * len(dataset_namelist)
-        dataset_vallist2 = [0] * len(dataset_namelist)
+            Extract the config and score list of one or multiple blobs

-        matched_blob_list = self.get_blob_list_matching_partial_jobid(console_args.azure_root_log_path,
-                                                                      partial_jobid_config)
+            Args:
+                matched_blob_list:
+                    matched blob list
+
+            Return:
+                a ConfigScore list object which stores the config and scores list for each matched blob lists
+
+        """
+        matched_config_score_lists = []
        for (each_jobconfig, each_blob) in matched_blob_list:
-            subdat_name = each_jobconfig.subdat
            self.download_azure_blob(each_blob.name)
            data_json = json.load(open(each_blob.name, "r"))
-            print(len(data_json["val_log"]))
-            validation_metric = data_json['valid_metric']
-            try:
-                dataset_idx = dataset_namelist.index(subdat_name)
-                dataset_vallist1[dataset_idx], dataset_vallist2[dataset_idx] \
-                    = self.get_validation_metricstr(validation_metric)
-            except ValueError:
-                pass
-        # print(" & ".join(dataset_vallist1))
-        # print(", ,".join(dataset_vallist2))
-
-    def get_validation_metricstr(self, validation_metric):
-        """
-            get a string representing validations for pasting to Google spreadsheet
-        """
-        validation_str1 = validation_str2 = ""
-        is_first = True
-        for key in ["f1", "accuracy", "pearson", "spearmanr", "matthews_correlation"]:
-            if "eval_" + key in validation_metric.keys():
-                if is_first:
-                    validation_str1 += str("%.1f" % (validation_metric["eval_" + key] * 100))
-                    validation_str2 += str(validation_metric["eval_" + key] * 100)
-                    is_first = False
-                else:
-                    validation_str1 += "/" + str("%.1f" % (validation_metric["eval_" + key] * 100))
-                    validation_str2 += "," + str(validation_metric["eval_" + key] * 100)
-        return validation_str1, validation_str2
-
-    def get_test_perf(self, partial_jobid_config=None, result_root_dir=None):
-        """
-            get the test scores for all blobs matching the partial_jobid_config
-        """
-        import shutil
-        from flaml.nlp.dataset.submission_auto import file_name_mapping_glue, output_blank_tsv
-        matched_blob_list = self.get_blob_list_matching_partial_jobid("data/", partial_jobid_config)
-        partial_jobid_str = partial_jobid_config.to_partial_jobid_string()
-        output_dir = os.path.join(result_root_dir, partial_jobid_str)
-        if os.path.exists(output_dir):
-            assert os.path.isdir(output_dir)
-        else:
-            os.mkdir(output_dir)
-        output_blank_tsv(output_dir)
-
-        for (each_jobconfig, each_blob) in matched_blob_list:
-            subdat_name = each_jobconfig.subdat
-            self.download_azure_blob(each_blob.name)
-            import zipfile
-            if os.path.exists(each_blob.name[:-4]):
-                assert os.path.isdir(each_blob.name[:-4])
-            else:
-                os.mkdir(each_blob.name[:-4])
-            with zipfile.ZipFile(each_blob.name, 'r') as zip_ref:
-                zip_ref.extractall(each_blob.name[:-4])
-            src = os.path.join(each_blob.name[:-4], file_name_mapping_glue[subdat_name][0])
-            dst = os.path.join(output_dir, file_name_mapping_glue[subdat_name][0])
-            shutil.copy(src, dst)
-        shutil.make_archive(os.path.join(output_dir), 'zip', output_dir)
-
-    def get_best_perf_config(self, console_args, jobid_config):
-        """
-            get the config of the best performed trial
-        """
-        matched_blob_list = self.get_blob_list_matching_partial_jobid(console_args.azure_root_log_path, jobid_config)
-        try:
-            assert len(matched_blob_list) == 1
-        except AssertionError:
-            import pdb
-            pdb.set_trace()
-
-        each_jobconfig, each_blob = matched_blob_list[0]
-        self.download_azure_blob(each_blob.name)
-        data_json = json.load(open(each_blob.name, "r"))
-
-        sorted_entries = sorted(data_json['val_log'], key=lambda x: x['metric_score']['max'], reverse=True)
-        best_config = sorted_entries[0]['config']
-        if jobid_config.subdat != "mrpc":
-            best_score = sorted_entries[0]['metric_score']['max']
-        else:
-            best_score = (data_json["valid_metric"]["eval_f1"], data_json["valid_metric"]["eval_accuracy"])
-        return best_config, best_score
+            each_config_and_score_list = ConfigScoreList(
+                jobid_config=each_jobconfig,
+                blob_file=each_blob,
+                config_score_list=[ConfigScore(**each_dict) for each_dict in data_json['val_log']])
+            matched_config_score_lists.append(each_config_and_score_list)
+        return matched_config_score_lists
--- a/flaml/nlp/result_analysis/generate_result_summary.py
+++ b/flaml/nlp/result_analysis/generate_result_summary.py
@ -1,357 +0,0 @@
-def extract_ranked_config_score(console_args, partial_config_dict):
-    from .azure_utils import AzureUtils
-    azure_utils = AzureUtils(console_args=console_args)
-
-    for method, each_partial_config in partial_config_dict.items():
-        dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(each_partial_config,
-                                                                                       ["dat", "subdat"], method)
-        for each_dataset, configscorelist in dataset2configscorelist.items():
-            for config_idx in range(len(configscorelist)):
-                avg_scores = configscorelist[config_idx][0][1]
-                top_config = configscorelist[config_idx][0][0]
-                print(avg_scores)
-                print(top_config)
-                # print(method + "," + str(each_dataset) + ",rep=" + str(config_idx))
-                # print("avg score :" + str(avg_scores))
-                # print(''.join(['{0}={1}\n'.format(key, top_config[key]) for key in sorted(top_config.keys())]))
-
-
-def extract_sorted_config_list(dataset2configscorelist, topk):
-    dataset2topkconfigs = {}
-    for dataset, configscorelist in dataset2configscorelist.items():
-        all_configscorelist = []
-        for scorelist in configscorelist:
-            for item in scorelist:
-                if item[0] not in [x[0] for x in all_configscorelist]:
-                    all_configscorelist.append(item)
-        sorted_all_configscorelist = sorted(all_configscorelist, key=lambda x: x[1], reverse=True)
-        topk_configs = []
-
-        for each_hp in ("learning_rate", "num_train_epochs", "per_device_train_batch_size", "warmup_ratio",
-                        "weight_decay", "adam_epsilon"):
-            topk_configs.append((each_hp, [sorted_all_configscorelist[x][0][each_hp] for x in range(topk)]))
-        topk_configs.append(("perf", [sorted_all_configscorelist[x][1] for x in range(topk)]))
-
-        dataset2topkconfigs[dataset] = topk_configs
-    return dataset2topkconfigs
-
-
-def dict2tuple(this_dict):
-    tuple_list = []
-    for key in sorted(this_dict.keys()):
-        tuple_list.append(this_dict[key])
-    return tuple(tuple_list)
-
-
-def merge_configscore_list(small_dataset2configscorelist):
-    dataset2merged_configscorelist = {}
-    for (dataset, each_configscore_list) in small_dataset2configscorelist.items():
-        merged_configscore_list = {}
-        for rep_id in range(len(each_configscore_list)):
-            for each_configscore_entry in each_configscore_list[rep_id]:
-                is_exist = False
-                for configscore in merged_configscore_list.keys():
-                    if configscore[0] == each_configscore_entry[0]:
-                        is_exist = True
-                        break
-                if is_exist is False:
-                    merged_configscore_list[dict2tuple(each_configscore_entry[0])] = each_configscore_entry[1]
-        dataset2merged_configscorelist[dataset] = merged_configscore_list
-    return dataset2merged_configscorelist
-
-
-def get_result(console_args, partial_jobid_config):
-    from .azure_utils import AzureUtils, JobID
-    azure_utils = AzureUtils(console_args=console_args)
-    dataset2configscorelist = \
-        azure_utils.get_config_and_score_from_partial_config(
-            console_args.azure_root_log_path,
-            partial_jobid_config,
-            ["dat", "subdat"],
-            "hpo")
-    for dataset, configscore_list in dataset2configscorelist.items():
-        for rep_id in range(len(configscore_list)):
-            config_dict = configscore_list[rep_id][0][0]
-            score = configscore_list[rep_id][0][1]
-            print(dataset, rep_id)
-            print_config(config_dict)
-            print(score)
-            print()
-
-
-def print_config(config_dict):
-    for key in sorted(config_dict.keys()):
-        if key in ("attention_probs_dropout_prob", "hidden_dropout_prob", "seed"):
-            continue
-        if key == "per_device_train_batch_size":
-            short_key = "batch_size"
-        elif key == "num_train_epochs":
-            short_key = "epochs"
-        else:
-            short_key = key
-        print(short_key, config_dict[key])
-
-
-def compare_small_vs_large(console_args):
-    from .azure_utils import AzureUtils, JobID
-    azure_utils = AzureUtils(console_args=console_args)
-
-    partial_jobid_config = JobID()
-    partial_jobid_config.pre = "deberta"
-    partial_jobid_config.mod = "hpo"
-    partial_jobid_config.spa = "uni"
-    partial_jobid_config.presz = "base"
-
-    small_dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(partial_jobid_config,
-                                                                                         ["dat", "subdat"], "list")
-
-    small_mergedconfiglist = merge_configscore_list(small_dataset2configscorelist)
-
-    partial_jobid_config = JobID()
-    partial_jobid_config.pre = "deberta"
-    partial_jobid_config.mod = "hpo"
-    partial_jobid_config.spa = "uni"
-    partial_jobid_config.presz = "large"
-
-    large_dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(partial_jobid_config,
-                                                                                         ["dat", "subdat"], "hpo")
-
-    large_mergedconfiglist = merge_configscore_list(large_dataset2configscorelist)
-
-    for (each_dataset, merged_small_configlist) in small_mergedconfiglist.items():
-        merged_large_configlist = large_mergedconfiglist[each_dataset]
-        print(each_dataset)
-        print()
-        for (each_tuple, large_score) in sorted(merged_large_configlist.items(), key=lambda x: x[1], reverse=True):
-            # small_score = merged_small_configlist[each_tuple]
-            is_in_onlysmall = each_tuple in small_mergedconfiglist[each_dataset]
-            for each_val in each_tuple:
-                print(each_val, end=", ")
-            print(large_score, is_in_onlysmall, sep=",")
-        print()
-        for (each_tuple, small_score) in \
-                sorted(small_mergedconfiglist[each_dataset].items(), key=lambda x: x[1], reverse=True):
-            is_in_large = each_tuple in large_mergedconfiglist[each_dataset]
-            for each_val in each_tuple:
-                print(each_val, end=", ")
-            print(small_score, is_in_large, sep=",")
-
-
-def check_conflict(console_args, partial_jobid_config_list):
-    from .azure_utils import AzureUtils, JobID
-    azure_utils = AzureUtils(console_args=console_args)
-    for each_partial_config in partial_jobid_config_list:
-        dataset2configscorelist = \
-            azure_utils.get_config_and_score_from_partial_config(
-                console_args.azure_root_log_path,
-                each_partial_config,
-                ["dat", "subdat"],
-                "unsorted")
-        for (dataset, configscorelists) in dataset2configscorelist.items():
-            config2score = {}
-            for each_configscorelist in configscorelists:
-                for (config, score, blobname) in each_configscorelist:
-                    config_dict = dict2tuple(config)
-                    try:
-                        config2score[config_dict].append((score, blobname))
-                    except KeyError:
-                        config2score.setdefault(config_dict, [])
-                        config2score[config_dict].append((score, blobname))
-            dup_keys = [config for config in config2score.keys() if len(config2score[config]) > 1]
-            dupkey_count = [len(set([y[0] for y in config2score[x]])) for x in dup_keys]
-            print(dataset)
-            print(len(config2score))
-            print(len(dupkey_count))
-            print(dupkey_count)
-
-
-def print_cfo(console_args):
-    from .azure_utils import JobID, AzureUtils
-    jobid_config = JobID()
-    jobid_config.mod = "bestnn"
-    jobid_config.spa = "buni"
-    jobid_config.alg = "bs"
-    jobid_config.pre = "funnel"
-    jobid_config.presz = "xlarge"
-
-    for each_rep in range(3):
-        jobid_config.rep = each_rep
-        azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config)
-
-        dataset2configscorelist = \
-            azure_utils.get_config_and_score_from_partial_config(
-                console_args.azure_root_log_path,
-                jobid_config,
-                ["dat", "subdat"],
-                "sort_time")
-        dataset = ('glue', 'mrpc')
-        configscorelist = dataset2configscorelist[dataset]
-        count = 0
-        print(dataset)
-        for (config, score, blobname) in sorted(configscorelist[0], key=lambda x: x[1], reverse=True)[0:1]:
-            print(count)
-            print(score)
-            print_config(config)
-            print()
-            count += 1
-
-
-def download_validation(console_args, result_root_dir):
-    from .azure_utils import JobID, AzureUtils
-    partial_jobid_config = JobID()
-    partial_jobid_config.mod = "grid"
-    partial_jobid_config.pre = "roberta"
-    partial_jobid_config.presz = "base"
-    # partial_jobid_config.alg = "optuna"
-    # partial_jobid_config.pru = "asha"
-    partial_jobid_config.rep = 0
-
-    azure_utils = AzureUtils(console_args=console_args, jobid=partial_jobid_config)
-    azure_utils.get_validation_perf(console_args=console_args, partial_jobid_config=partial_jobid_config)
-    azure_utils.get_test_perf(partial_jobid_config, result_root_dir)
-
-
-def get_result_str(jobid_config, val_score, test_score, best_config, subdat2config=None, mode="grid"):
-    result_str = jobid_config.subdat.upper() + ","
-    if jobid_config.alg:
-        result_str += jobid_config.alg.upper().replace("OPTUNA", "Optuna")
-    if jobid_config.pru is not None and jobid_config.pru != "None":
-        result_str += "+" + jobid_config.pru.upper()
-    if jobid_config.subdat != "mrpc":
-        result_str += ",rep " + str(jobid_config.rep) + " & " + str(
-            "%.1f" % (val_score * 100)) + " & " + str(test_score)
-    else:
-        result_str += ",rep " + str(jobid_config.rep) + " & " + str(
-            "%.1f" % (val_score[0] * 100)) + "/" + str(
-            "%.1f" % (val_score[1] * 100)) + " & " + str(test_score)
-    for hp in ["learning_rate", "warmup_ratio", "per_device_train_batch_size", "hidden_dropout", "attention_dropout",
-               "weight_decay"]:
-        if hp not in best_config:
-            result_str += " & "
-        else:
-            if mode == "hpo":
-                if best_config[hp] > 1.2 * subdat2config[jobid_config.subdat][hp]:
-                    wrap_left = "\\cellcolor{green!85}{"
-                elif best_config[hp] > subdat2config[jobid_config.subdat][hp]:
-                    wrap_left = "\\cellcolor{green!15}{"
-                elif best_config[hp] < subdat2config[jobid_config.subdat][hp] / 1.2:
-                    wrap_left = "\\cellcolor{red!85}{"
-                else:
-                    wrap_left = "\\cellcolor{red!15}{"
-                wrap_right = "}"
-            else:
-                wrap_left = wrap_right = ""
-            if hp == "per_device_train_batch_size" or hp == "learning_rate":
-                wrap_left = wrap_right = ""
-            if hp == "learning_rate":
-                result_str += " & " + wrap_left + "{:.1e}".format(best_config[hp]) + wrap_right
-            elif hp == "per_device_train_batch_size":
-                result_str += " & " + wrap_left + str(best_config[hp]) + wrap_right
-            else:
-                result_str += " & " + wrap_left + str("%.3f" % best_config[hp]) + wrap_right
-    return result_str + "\\\\"
-
-
-def extract_grid(console_args, jobid_config, overfitting_subdat, test_scores):
-    from .azure_utils import JobID, AzureUtils
-    key2printstr = {}
-    subdat2config = {}
-    for idx in range(len(overfitting_subdat)):
-        jobid_config.subdat = overfitting_subdat[idx]
-        jobid_config.mod = "grid"
-        jobid_config.rep = 0
-        azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config)
-        best_config, val_score = azure_utils.get_best_perf_config(console_args, jobid_config)
-        best_config["hidden_dropout"] = 0.1
-        best_config["attention_dropout"] = 0.1
-        test_score = test_scores[idx]
-        key2printstr[jobid_config.subdat.upper() + ", grid"] = get_result_str(jobid_config, val_score,
-                                                                              test_score, best_config)
-        subdat2config[jobid_config.subdat] = best_config
-    print()
-    for key, printstr in sorted(key2printstr.items(), key=lambda x: x[0]):
-        print(printstr)
-    return subdat2config
-
-
-def extract_hpo(
-        console_args,
-        jobid_config,
-        overfitting_subdat,
-        overfitting_alg,
-        overfitting_pru,
-        overfitting_rep,
-        subdat2config,
-        test_scores):
-    from .azure_utils import AzureUtils
-    key2printstr = {}
-    for idx in range(len(overfitting_subdat)):
-        jobid_config.subdat = overfitting_subdat[idx]
-        jobid_config.alg = overfitting_alg[idx]
-        jobid_config.pru = overfitting_pru[idx]
-        jobid_config.rep = overfitting_rep[idx]
-        azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config)
-        best_config, val_score = azure_utils.get_best_perf_config(console_args, jobid_config)
-        test_score = test_scores[idx]
-        key2printstr[jobid_config.subdat.upper() + "," + jobid_config.alg.upper() + ","
-                     + jobid_config.pru + ",rep " + str(jobid_config.rep)] \
-            = get_result_str(jobid_config, val_score, test_score, best_config, subdat2config, mode="hpo")
-
-    for key, printstr in sorted(key2printstr.items(), key=lambda x: x[0]):
-        print(printstr)
-
-
-def extract_roberta_overfitting_configs(console_args):
-    from .azure_utils import JobID, AzureUtils
-    jobid_config = JobID()
-    jobid_config.pre = "roberta"
-    jobid_config.presz = "base"
-
-    overfitting_subdat = ["rte", "mrpc", "cola", "sst2", "stsb"]
-    test_scores = ["73.1", "91.4/88.5", "61.4", "96", "89.5/88.7"]
-    subdat2config = extract_grid(console_args, jobid_config, overfitting_subdat, test_scores)
-
-    jobid_config = JobID()
-    jobid_config.pre = "roberta"
-    jobid_config.presz = "base"
-
-    overfitting_subdat = ["rte", "rte", "rte", "mrpc", "mrpc", "mrpc", "sst2",
-                          "rte", "mrpc", "mrpc", "stsb", "sst2", "sst2",
-                          "rte", "rte", "mrpc", "mrpc", "sst2", "sst2"]
-    overfitting_alg = ["rs", "rs", "rs", "rs", "rs", "rs", "rs",
-                       "rs", "rs", "rs", "rs", "rs", "rs",
-                       "optuna", "optuna", "optuna", "optuna", "optuna", "optuna"]
-    overfitting_pru = ["None", "None", "None", "None", "None", "None", "None",
-                       "asha", "asha", "asha", "asha", "asha", "asha",
-                       "asha", "asha", "asha", "asha", "asha", "asha"]
-    overfitting_rep = [0, 1, 2, 0, 1, 2, 0,
-                       1, 0, 2, 2, 1, 2,
-                       1, 2, 0, 1, 1, 2]
-    test_scores = ["71.5", "72.3", "72.2", "90.5/87.1", "90.5/87.4", "90.5/87.2", "95.6",
-                   "72.4", "90.7/87.4", "91.0/87.9", "89.4/88.8", "95.2", "95.7",
-                   "72.4", "72.4", "90.8/87.4", "90.3/86.5", "95.1", "95.8"]
-    extract_hpo(console_args, jobid_config, overfitting_subdat, overfitting_alg, overfitting_pru, overfitting_rep,
-                subdat2config, test_scores)
-
-
-def extract_electra_overfitting_configs(console_args):
-    from .azure_utils import JobID, AzureUtils
-    jobid_config = JobID()
-    jobid_config.pre = "electra"
-    jobid_config.presz = "base"
-
-    overfitting_subdat = ["rte", "qnli", "cola"]
-    test_scores = ["74.4", "93.2", "64.8"]
-    subdat2config = extract_grid(console_args, jobid_config, overfitting_subdat, test_scores)
-
-    jobid_config = JobID()
-    jobid_config.pre = "electra"
-    jobid_config.presz = "base"
-
-    overfitting_subdat = ["rte", "rte", "qnli", "cola", "qnli", "cola"]
-    overfitting_alg = ["rs", "rs", "rs", "rs", "rs", "optuna"]
-    overfitting_pru = ["None", "None", "None", "asha", "asha", "asha"]
-    overfitting_rep = [0, 1, 0, 2, 0, 0]
-    test_scores = ["73.8", "74.3", "92.8", "64.7", "92.9", "63.6"]
-    extract_hpo(console_args, jobid_config, overfitting_subdat, overfitting_alg, overfitting_pru, overfitting_rep,
-                subdat2config, test_scores)
--- a/flaml/nlp/result_analysis/wandb_utils.py
+++ b/flaml/nlp/result_analysis/wandb_utils.py
@ -1,7 +1,5 @@
 import os
-from ..utils import get_wandb_azure_key
 import subprocess
-import wandb
 import hashlib
 from time import time

@ -26,12 +24,14 @@ class WandbUtils:
    # https://docs.ray.io/en/master/tune/tutorials/tune-wandb.html

    def __init__(self,
-                 is_wandb_on=None,
+                 is_wandb_on=False,
                 console_args=None,
                 jobid_config=None):
        if is_wandb_on:
+            from ..utils import get_wandb_azure_key
            wandb_key, azure_key, container_name = get_wandb_azure_key(console_args.key_path)
-            subprocess.run(["wandb", "login", "--relogin", wandb_key])
+            if wandb_key != "":
+                subprocess.run(["wandb", "login", "--relogin", wandb_key])
            os.environ["WANDB_API_KEY"] = wandb_key
            os.environ["WANDB_MODE"] = "online"
        else:
@ -40,16 +40,24 @@ class WandbUtils:

    def set_wandb_per_trial(self):
        print("before wandb.init\n\n\n")
-        if os.environ["WANDB_MODE"] == "online":
-            os.environ["WANDB_SILENT"] = "false"
-            return wandb.init(project=self.jobid_config.get_jobid_full_data_name(),
-                              group=self.wandb_group_name,
-                              name=str(WandbUtils._get_next_trial_ids()),
-                              settings=wandb.Settings(
-                                  _disable_stats=True),
-                              reinit=False)
-        else:
-            return None
+        try:
+            import wandb
+            try:
+                if os.environ["WANDB_MODE"] == "online":
+                    os.environ["WANDB_SILENT"] = "false"
+                    return wandb.init(project=self.jobid_config.get_jobid_full_data_name(),
+                                      group=self.wandb_group_name,
+                                      name=str(WandbUtils._get_next_trial_ids()),
+                                      settings=wandb.Settings(
+                                          _disable_stats=True),
+                                      reinit=False)
+                else:
+                    return None
+            except wandb.errors.UsageError as err:
+                print(err)
+                return None
+        except ImportError:
+            print("To use the wandb component in flaml.nlp, run pip install wandb==0.10.26")

    @staticmethod
    def _get_next_trial_ids():
@ -58,14 +66,22 @@ class WandbUtils:
        return "trial_" + hash.hexdigest()[:3]

    def set_wandb_per_run(self):
-        os.environ["WANDB_RUN_GROUP"] = self.jobid_config.to_wandb_string() + wandb.util.generate_id()
-        self.wandb_group_name = os.environ["WANDB_RUN_GROUP"]
-        if os.environ["WANDB_MODE"] == "online":
-            os.environ["WANDB_SILENT"] = "false"
-            return wandb.init(project=self.jobid_config.get_jobid_full_data_name(),
-                              group=os.environ["WANDB_RUN_GROUP"],
-                              settings=wandb.Settings(
-                                  _disable_stats=True),
-                              reinit=False)
-        else:
-            return None
+        try:
+            import wandb
+            os.environ["WANDB_RUN_GROUP"] = self.jobid_config.to_wandb_string() + wandb.util.generate_id()
+            self.wandb_group_name = os.environ["WANDB_RUN_GROUP"]
+            try:
+                if os.environ["WANDB_MODE"] == "online":
+                    os.environ["WANDB_SILENT"] = "false"
+                    return wandb.init(project=self.jobid_config.get_jobid_full_data_name(),
+                                      group=os.environ["WANDB_RUN_GROUP"],
+                                      settings=wandb.Settings(
+                                          _disable_stats=True),
+                                      reinit=False)
+                else:
+                    return None
+            except wandb.errors.UsageError as err:
+                print(err)
+                return None
+        except ImportError:
+            print("To use the wandb component in flaml.nlp, run pip install wandb==0.10.26")
--- a/flaml/nlp/utils.py
+++ b/flaml/nlp/utils.py
@ -8,31 +8,31 @@ from dataclasses import dataclass, field

 def dataset_subdataset_name_format_check(val_str):
    regex = re.compile(r"^[^:]*:[^:]*$")
-    if not regex.match(val_str):
+    if (val_str is not None) and (not regex.match(val_str)):
        raise argparse.ArgumentTypeError("dataset_subdataset_name must be in the format {data_name}:{subdata_name}")
    return val_str


 def pretrained_model_size_format_check(val_str):
    regex = re.compile(r"^[^:]*:(small|base|large|xlarge)")
-    if not regex.match(val_str):
+    if (val_str is not None) and (not regex.search(val_str)):
        raise argparse.ArgumentTypeError("pretrained_model_size must be in the format {model_name}:{model_size},"
                                         "where {model_name} is the name from huggingface.co/models, {model_size}"
                                         "is chosen from small, base, large, xlarge")
    return val_str


-def load_console_args(**custom_data_args):
+def load_dft_args():
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument('--server_name', type=str, help='server name', required=False,
                            choices=["tmdev", "dgx", "azureml"], default="tmdev")
    arg_parser.add_argument('--algo_mode', type=str, help='hpo or grid search', required=False,
-                            choices=["grid", "gridbert", "hpo", "hfhpo", "list_s", "list", "bestnn"], default="hpo")
+                            choices=["grid", "hpo", "hfhpo"], default="hpo")
    arg_parser.add_argument('--data_root_dir', type=str, help='data dir', required=False, default="data/")
    arg_parser.add_argument('--dataset_subdataset_name', type=dataset_subdataset_name_format_check,
                            help='dataset and subdataset name', required=False, default=None)
    arg_parser.add_argument('--space_mode', type=str, help='space mode', required=False,
-                            choices=["gnr", "uni", "uni_test", "cus", "buni"], default="uni")
+                            choices=["grid", "gnr", "uni", "uni_test", "cus", "buni"], default="uni")
    arg_parser.add_argument('--search_alg_args_mode', type=str, help='search algorithm args mode', required=False,
                            choices=["dft", "exp", "cus"], default="dft")
    arg_parser.add_argument('--algo_name', type=str, help='algorithm', required=False,
@ -56,25 +56,22 @@ def load_console_args(**custom_data_args):
    arg_parser.add_argument('--round_idx', type=int, help='round idx for acl experiments', required=False, default=0)
    arg_parser.add_argument('--seed_data', type=int, help='seed of data shuffling', required=False, default=43)
    arg_parser.add_argument('--seed_transformers', type=int, help='seed of transformers', required=False, default=42)
+    arg_parser.add_argument('--optarg1', type=float, help='place holder for optional arg', required=False)
+    arg_parser.add_argument('--optarg2', type=float, help='place holder for optional arg', required=False)
    args, unknown = arg_parser.parse_known_args()
-
-    for each_key in custom_data_args.keys():
-        if args.__contains__(each_key):
-            try:
-                check_key_format_func = globals()[each_key + "_format_check"]
-                check_key_format_func(custom_data_args[each_key])
-            except KeyError:
-                pass
-            setattr(args, each_key, custom_data_args[each_key])
    return args


 def get_wandb_azure_key(key_path):
-    key_json = json.load(open(os.path.join(key_path, "key.json"), "r"))
-    wandb_key = key_json["wandb_key"]
-    azure_key = key_json["azure_key"]
-    azure_container_name = key_json["container_name"]
-    return wandb_key, azure_key, azure_container_name
+    try:
+        key_json = json.load(open(os.path.join(key_path, "key.json"), "r"))
+        wandb_key = key_json["wandb_key"]
+        azure_key = key_json["azure_key"]
+        azure_container_name = key_json["container_name"]
+        return wandb_key, azure_key, azure_container_name
+    except FileNotFoundError:
+        print("File not found for key.json under", key_path)
+        return "", "", ""


 def merge_dicts(dict1, dict2):
--- a/notebook/flaml_autohf.ipynb
+++ b/notebook/flaml_autohf.ipynb
@ -1,43 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "source": [
-    "1. Electra Example"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/setup.py
+++ b/setup.py
@ -72,6 +72,12 @@ setuptools.setup(
        ],
        "vw": [
            "vowpalwabbit",
+        ],
+        "nlp": [
+            "ray[tune]>=1.2.0",
+            "transformers",
+            "datasets==1.4",
+            "torch"
        ]
    },
    classifiers=[
--- a/test/hf/run_analysis.py
+++ b/test/hf/run_analysis.py
@ -1,75 +0,0 @@
-'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
-'''
-# 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
-import argparse
-from flaml.nlp.result_analysis.azure_utils import JobID
-
-
-def create_partial_config_bestnn():
-    jobid_config = JobID()
-    # funnel xlarge
-    # jobid_config.mod = "bestnn"
-    jobid_config.spa = "uni"
-    # jobid_config.arg = "cus"
-    # jobid_config.alg = "cfo"
-    jobid_config.pre = "funnel"
-    jobid_config.presz = "xlarge"
-    # funnel small
-    # jobid_config.mod = "list"
-    # jobid_config.pre = "funnel"
-    # jobid_config.presz = "small"
-    # jobid_config.rep = 0
-
-    # # deberta large
-    # jobid_config.mod = "bestnn"
-    # jobid_config.spa = "uni"
-    # jobid_config.arg = "cus"
-    # jobid_config.alg = "cfo"
-    # jobid_config.pre = "deberta"
-    # jobid_config.presz = "large"
-
-    # # deberta base
-    # jobid_config.mod = "hpo"
-    # jobid_config.pre = "deberta"
-    # jobid_config.presz = "base"
-    # jobid_config.rep = 0
-
-    # # deberta large
-    # jobid_config.mod = "hpo"
-    # jobid_config.pre = "deberta"
-    # jobid_config.presz = "large"
-
-    return jobid_config
-
-
-def create_partial_config_list():
-    jobid_config = JobID()
-    jobid_config.mod = "list"
-    jobid_config.spa = "uni"
-    jobid_config.presz = "xlarge"
-    return jobid_config
-
-
-def create_partial_config_hpo():
-    jobid_config = JobID()
-    jobid_config.mod = "hpo"
-    jobid_config.spa = "uni"
-    return jobid_config
-
-
-if __name__ == "__main__":
-    arg_parser = argparse.ArgumentParser()
-    arg_parser.add_argument('--key_path', type=str, help='key path', required=False, default="../../")
-    arg_parser.add_argument('--azure_root_log_path', type=str,
-                            help='root log path of blob storage', required=True, default="logs_azure/")
-    args = arg_parser.parse_args()
-
-    partial_config_large = create_partial_config_bestnn()
-    from flaml.nlp.result_analysis.generate_result_summary import compare_small_vs_large, get_result, check_conflict, \
-        print_cfo, download_validation, extract_roberta_overfitting_configs, extract_electra_overfitting_configs
-
-    # get_result(args, partial_config_large)
-    # check_conflict(args, [partial_config_large])
-    download_validation(args, "/data/xliu127/projects/hyperopt/data/result/")
-
-    # extract_roberta_overfitting_configs(args)
--- a/test/hf/run_autohf.py
+++ b/test/hf/run_autohf.py
@ -1,285 +0,0 @@
-'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
-'''
-# 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
-import os
-import shutil
-
-from flaml.nlp import AutoTransformers
-from flaml.nlp import AzureUtils, JobID
-from flaml.nlp.utils import load_console_args
-
-global azure_log_path
-global azure_key
-
-
-def get_resplit_portion(jobid_config):
-    if jobid_config.dat == ["glue"] and jobid_config.subdat in {"mnli"}:
-        return {"source": ["train", "validation_matched"], "train": [0, 0.8], "validation": [0.8, 0.9],
-                "test": [0.9, 1.0]}
-    else:
-        return {"source": ["train", "validation"], "train": [0, 0.8], "validation": [0.8, 0.9], "test": [0.9, 1.0]}
-
-
-def get_preparedata_setting(args, jobid_config):
-    preparedata_setting = {
-        "server_name": args.server_name,
-        "data_root_path": args.data_root_dir,
-        "max_seq_length": 128,
-        "jobid_config": jobid_config,
-        "is_wandb_on": True
-    }
-    if jobid_config.spt == 'rspt':
-        preparedata_setting["resplit_portion"] = get_resplit_portion(jobid_config)
-    if ("albert" == jobid_config.pre and jobid_config.dat == ["squad"]) or \
-            ("funnel" in jobid_config.pre and jobid_config.dat[0] in {"imdb", "yelp_review_full", "yelp_polarity",
-                                                                      "amazon_polarity", "amazon_review_multi"}):
-        preparedata_setting["max_seq_length"] = 512
-    if jobid_config.dat[0] == "glue" and jobid_config.subdat == "mnli":
-        preparedata_setting["fold_name"] = ['train', 'validation_matched', 'test_matched']
-    return preparedata_setting
-
-
-def get_autohf_settings(args, **custom_args):
-    autohf_settings = {"resources_per_trial": {"gpu": 1, "cpu": 1},
-                       "num_samples": args.sample_num,
-                       "time_budget": args.time_budget,
-                       "ckpt_per_epoch": 1,
-                       }
-    for other_attr in ["ds_config", "rep_id"]:
-        if hasattr(args, other_attr):
-            autohf_settings[other_attr] = getattr(args, other_attr)
-        else:
-            autohf_settings[other_attr] = None
-    if len(custom_args) > 0:
-        autohf_settings.update(custom_args)
-    return autohf_settings
-
-
-def rm_home_result():
-    from os.path import expanduser
-    home = expanduser("~")
-    if os.path.exists(home + "/ray_results/"):
-        shutil.rmtree(home + "/ray_results/")
-
-
-def get_best_base_config(args, jobid_config, autohf):
-    import copy
-    import re
-    args_small = copy.deepcopy(args)
-    args_small.algo_name = "optuna"
-    args_small.search_alg_args_mode = "dft"
-    args_small.algo_mode = "hpo"
-    args_small.space_mode = "uni"
-    args_small.pruner = "None"
-
-    if "funnel" not in args_small.pretrained_model_size:
-        args_small.algo_mode = "hpo"
-    else:
-        args_small.algo_mode = "list"
-    args_small.sample_num = 10000
-    args_small.time_budget = 3600
-    args_small.rep_id = 0
-    jobid_config_small = JobID(args_small)
-    if jobid_config_small.pre == "deberta":
-        jobid_config_small.presz = "base"
-    else:
-        jobid_config_small.presz = "small"
-    jobid_config_small.pre_full = re.sub("(xlarge|large|intermediate)", jobid_config_small.presz,
-                                         jobid_config_small.pre_full)
-    azure_utils_small = AzureUtils(
-        console_args=args_small,
-        jobid=jobid_config_small,
-        autohf=autohf)
-    preparedata_setting = get_preparedata_setting(args, jobid_config)
-    autohf.prepare_data(**preparedata_setting)
-    autohf.set_metric()
-
-    best_config = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)[0]
-    return best_config
-
-
-def search_base_and_search_lower_lr(args, jobid_config, autohf):
-    best_config = get_best_base_config(args, jobid_config, autohf)
-
-    import copy
-    args_large = copy.deepcopy(args)
-    args_large.time_budget = args.time_budget - 3600
-    args_large.sample_num = 100000
-    args_large.algo_name = args.algo_name
-    args_large.search_alg_args_mode = "cus"
-    args_large.space_mode = "buni"
-    args_large.pruner = "None"
-    jobid_config_large = JobID(args_large)
-    jobid_config_large.presz = jobid_config.presz
-    jobid_config_large.pre_full = jobid_config.pre_full
-    azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf)
-
-    _test_hpo(args_large,
-              jobid_config_large,
-              autohf,
-              azure_utils_large,
-              autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config],
-                                                                 "bound": {"learning_rate": {
-                                                                     "u": best_config["learning_rate"]}}}))
-
-
-def search_base_and_search_around_best(args, jobid_config, autohf):
-    args.algo_name = "bs"
-    args.search_alg_args_mode = "dft"
-    args.spa = "uni"
-    args.pru = "None"
-    best_config = get_best_base_config(args, jobid_config, autohf)
-
-    import copy
-    args_large = copy.deepcopy(args)
-    args_large.time_budget = args.time_budget - 3600
-    args_large.sample_num = 100000
-    args_large.algo_name = "cfo"
-    args_large.search_alg_args_mode = "cus"
-    args_large.space_mode = "uni"
-    jobid_config_large = JobID(args_large)
-    jobid_config_large.presz = jobid_config.presz
-    jobid_config_large.pre_full = jobid_config.pre_full
-    azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf)
-
-    _test_hpo(args_large,
-              jobid_config_large,
-              autohf,
-              azure_utils_large,
-              autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config]}))
-
-
-def evaluate_configs(autohf, args, ranked_all_configs):
-    import copy
-    this_args = copy.deepcopy(args)
-    this_args.time_budget = 100000
-    this_args.sample_num = int(len(ranked_all_configs))
-    this_args.search_alg_args_mode = "cus"
-    jobid_config = JobID(this_args)
-    azure_utils_large = AzureUtils(console_args=this_args, jobid=jobid_config, autohf=autohf)
-    _test_hpo(this_args,
-              jobid_config,
-              autohf,
-              azure_utils_large,
-              autohf_settings=get_autohf_settings(this_args, **{"points_to_evaluate": ranked_all_configs}))
-
-
-def convert_config_to_different_size(origin_config, mode):
-    import re
-    import copy
-    if mode == "small":
-        new_config = copy.deepcopy(origin_config)
-        if new_config.pre == "funnel":
-            new_config.mod = "list"
-        else:
-            new_config.mod = "hpo"
-        if new_config.pre == "funnel":
-            new_config.presz = "small"
-        else:
-            new_config.presz = "base"
-        new_config.pre_full = re.sub("(xlarge|large|intermediate)", new_config.presz, origin_config.pre_full)
-    elif mode == "large":
-        new_config = copy.deepcopy(origin_config)
-        new_config.mod = "hpo"
-        if new_config.pre == "funnel":
-            new_config.presz = "xlarge"
-            new_config.pre_full = re.sub("(small)", "xlarge", origin_config.pre_full)
-        else:
-            new_config.presz = "large"
-            new_config.pre_full = re.sub("(small)", "large", origin_config.pre_full)
-
-    return new_config
-
-
-def evaluate_small_best_configs_on_large(large_args, autohf):
-    jobid_config_small = convert_config_to_different_size(JobID(large_args), mode="small")
-    jobid_config_small.rep = 0
-    azure_utils_small = AzureUtils(console_args=None, jobid=jobid_config_small, autohf=autohf)
-    ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)
-    evaluate_configs(large_args, ranked_all_small_configs[:int(len(ranked_all_small_configs) / 2)])
-
-
-def add_dict_item_to_list(this_list, this_dict):
-    is_exist = len([x for x in this_list if x == this_dict]) > 0
-    if not is_exist:
-        this_list.append(this_dict)
-    return this_list
-
-
-def evaluate_large_best_configs_on_small(small_args, autohf):
-    jobid_config_large = convert_config_to_different_size(JobID(small_args), mode="large")
-    autohf.jobid_config = jobid_config_large
-    autohf.set_metric()
-    all_configs_from_large = []
-    for rep_id in range(3):
-        jobid_config_large.rep = rep_id
-        azure_utils_large = AzureUtils(console_args=small_args, jobid=jobid_config_large, autohf=autohf)
-        ranked_all_large_configs = azure_utils_large.get_ranked_configs(autohf.metric_mode_name)
-        for each_config in ranked_all_large_configs:
-            all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config)
-    jobid_config_small = convert_config_to_different_size(JobID(small_args), mode="small")
-    jobid_config_small.rep = 0
-    azure_utils_small = AzureUtils(console_args=small_args, jobid=jobid_config_small, autohf=autohf)
-    ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)
-    for each_config in ranked_all_small_configs:
-        all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config)
-
-    evaluate_configs(autohf, small_args, list(all_configs_from_large))
-
-
-def _test_hpo(args,
-              jobid_config,
-              autohf,
-              azure_utils=None,
-              autohf_settings=None,
-              ):
-    try:
-        if not azure_utils:
-            azure_utils = AzureUtils(console_args=args, jobid=jobid_config, autohf=autohf)
-        preparedata_setting = get_preparedata_setting(args, jobid_config)
-        autohf.prepare_data(**preparedata_setting)
-
-        analysis = validation_metric = test_metric = None
-        if not autohf_settings:
-            autohf_settings = get_autohf_settings(args)
-        if args.algo_mode != "hfhpo":
-            validation_metric, analysis = autohf.fit(**autohf_settings, )
-        else:
-            autohf.fit_hf(**autohf_settings)
-
-        if jobid_config.spt == "ori":
-            predictions, test_metric = autohf.predict()
-            if validation_metric:
-                test_metric.update({"validation": validation_metric})
-        else:
-            predictions = None
-            if test_metric:
-                validation_metric.update({"test": test_metric})
-
-        if analysis is not None:
-            json_log = azure_utils.extract_log_from_analysis(analysis)
-        else:
-            json_log = None
-        azure_utils.write_autohf_output(json_log=json_log,
-                                        valid_metric=validation_metric,
-                                        predictions=predictions,
-                                        duration=autohf.last_run_duration)
-
-    except AssertionError:
-        azure_utils.write_exception()
-    rm_home_result()
-
-
-if __name__ == "__main__":
-    autohf = AutoTransformers()
-    args = load_console_args()
-    jobid_config = JobID(args)
-
-    if args.algo_mode in ("hpo", "hfhpo", "grid", "gridbert"):
-        _test_hpo(args, jobid_config, autohf)
-    elif args.algo_mode == "bestnn":
-        search_base_and_search_lower_lr(args, jobid_config, autohf)
-    elif args.algo_mode == "list":
-        evaluate_small_best_configs_on_large(args, autohf)
-    elif args.algo_mode == "list_s":
-        evaluate_large_best_configs_on_small(args, autohf)
--- a/test/hf/test_cover_azure.py
+++ b/test/hf/test_cover_azure.py
@ -0,0 +1,126 @@
+"""
+    test suites for covering azure_utils.py
+"""
+
+
+def get_preparedata_setting(jobid_config):
+    preparedata_setting = {
+        "server_name": "tmdev",
+        "data_root_path": "data/",
+        "max_seq_length": 128,
+        "jobid_config": jobid_config,
+        "resplit_portion": {"source": ["train", "validation"],
+                            "train": [0, 0.8],
+                            "validation": [0.8, 0.9],
+                            "test": [0.9, 1.0]}
+    }
+    return preparedata_setting
+
+
+def get_console_args():
+    from flaml.nlp.utils import load_dft_args
+    args = load_dft_args()
+    args.dataset_subdataset_name = "glue:mrpc"
+    args.algo_mode = "hpo"
+    args.space_mode = "uni"
+    args.search_alg_args_mode = "dft"
+    args.algo_name = "bs"
+    args.pruner = "None"
+    args.pretrained_model_size = "google/electra-base-discriminator:base"
+    args.resplit_mode = "rspt"
+    args.rep_id = 0
+    args.seed_data = 43
+    args.seed_transformers = 42
+    return args
+
+
+def test_get_configblob_from_partial_jobid():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    each_blob_name = "dat=glue_subdat=cola_mod=grid_spa=cus_arg=dft_alg=grid" \
+                     "_pru=None_pre=deberta_presz=large_spt=rspt_rep=0_sddt=43" \
+                     "_sdhf=42_var1=1e-05_var2=0.0.json"
+    partial_jobid = JobID()
+    partial_jobid.pre = "deberta"
+    partial_jobid.mod = "grid"
+    partial_jobid.spa = "cus"
+    partial_jobid.presz = "large"
+
+    each_jobconfig = JobID.convert_blobname_to_jobid(each_blob_name)
+    each_jobconfig.is_match(partial_jobid)
+
+    partial_jobid = JobID()
+    partial_jobid.pre = "deberta"
+    partial_jobid.mod = "hpo"
+    partial_jobid.spa = "cus"
+    partial_jobid.presz = "large"
+    partial_jobid.sddt = None
+
+    each_jobconfig = JobID.convert_blobname_to_jobid(each_blob_name)
+    each_jobconfig.is_match(partial_jobid)
+
+
+def test_jobid():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    args = get_console_args()
+
+    jobid_config = JobID(args)
+    jobid_config.to_partial_jobid_string()
+    JobID.convert_blobname_to_jobid("test")
+    JobID.dataset_list_to_str("glue")
+    JobID.get_full_data_name(["glue"], "mrpc")
+    JobID._extract_model_type_with_keywords_match("google/electra-base-discriminator:base")
+
+    jobid_config.to_wandb_string()
+
+
+def test_azureutils():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.result_analysis.azure_utils import AzureUtils, ConfigScore, JobID, ConfigScoreList
+    from flaml.nlp import AutoTransformers
+
+    args = get_console_args()
+    args.key_path = "."
+    jobid_config = JobID(args)
+    autohf = AutoTransformers()
+    autohf.jobid_config = jobid_config
+
+    preparedata_setting = get_preparedata_setting(jobid_config)
+    autohf.prepare_data(**preparedata_setting)
+
+    each_configscore = ConfigScore(trial_id="test", start_time=0.0, last_update_time=0.0,
+                                   config={}, metric_score={"max": 0.0}, time_stamp=0.0)
+    configscore_list = ConfigScoreList([each_configscore])
+    for each_method in ["unsorted", "sort_time", "sort_accuracy"]:
+        configscore_list.sorted(each_method)
+    configscore_list.get_best_config()
+
+    azureutils = AzureUtils(console_args=args, autohf=autohf)
+    azureutils.autohf = autohf
+    azureutils.root_log_path = "logs_azure/"
+
+    azureutils.write_autohf_output(configscore_list=[each_configscore],
+                                   valid_metric={},
+                                   predictions=[],
+                                   duration=0)
+
+    azureutils.get_config_and_score_from_partial_jobid(root_log_path="data/", partial_jobid=jobid_config)
+
+
+if __name__ == "__main__":
+    test_get_configblob_from_partial_jobid()
+    test_jobid()
+    test_azureutils()
--- a/test/hf/test_cover_other.py
+++ b/test/hf/test_cover_other.py
@ -0,0 +1,214 @@
+"""
+    test suites for covering other functions
+"""
+
+from transformers import AutoConfig
+from flaml.nlp.huggingface.trainer import TrainerForAutoTransformers
+
+
+def get_console_args():
+    from flaml.nlp.utils import load_dft_args
+    args = load_dft_args()
+    args.dataset_subdataset_name = "glue:mrpc"
+    args.algo_mode = "hpo"
+    args.space_mode = "uni"
+    args.search_alg_args_mode = "dft"
+    args.algo_name = "bs"
+    args.pruner = "None"
+    args.pretrained_model_size = "google/electra-base-discriminator:base"
+    args.resplit_mode = "rspt"
+    args.rep_id = 0
+    args.seed_data = 43
+    args.seed_transformers = 42
+    return args
+
+
+def model_init():
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    jobid_config = JobID()
+    jobid_config.set_unittest_config()
+    from flaml.nlp import AutoTransformers
+    autohf = AutoTransformers()
+
+    preparedata_setting = get_preparedata_setting(jobid_config)
+    autohf.prepare_data(**preparedata_setting)
+    return autohf._load_model()
+
+
+def get_preparedata_setting(jobid_config):
+    preparedata_setting = {
+        "server_name": "tmdev",
+        "data_root_path": "data/",
+        "max_seq_length": 128,
+        "jobid_config": jobid_config,
+        "resplit_portion": {"source": ["train", "validation"],
+                            "train": [0, 0.8],
+                            "validation": [0.8, 0.9],
+                            "test": [0.9, 1.0]}
+    }
+    return preparedata_setting
+
+
+def test_dataprocess():
+    """
+    test to increase the coverage for flaml.nlp.dataprocess_auto
+    """
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp import AutoTransformers
+    from flaml.nlp import JobID
+    from flaml.nlp.dataset.dataprocess_auto import TOKENIZER_MAPPING
+
+    jobid_config = JobID()
+    jobid_config.set_unittest_config()
+    autohf = AutoTransformers()
+
+    dataset_name = JobID.dataset_list_to_str(jobid_config.dat)
+    default_func = TOKENIZER_MAPPING[(dataset_name, jobid_config.subdat)]
+
+    funcs_to_eval = set([(dat, subdat) for (dat, subdat) in TOKENIZER_MAPPING.keys()
+                         if TOKENIZER_MAPPING[(dat, subdat)] != default_func])
+
+    for (dat, subdat) in funcs_to_eval:
+        print("loading dataset for {}, {}".format(dat, subdat))
+        jobid_config.dat = dat.split(",")
+        jobid_config.subdat = subdat
+
+        preparedata_setting = get_preparedata_setting(jobid_config)
+        autohf.prepare_data(**preparedata_setting)
+
+
+def test_gridsearch_space():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.hpo.grid_searchspace_auto import GRID_SEARCH_SPACE_MAPPING, AutoGridSearchSpace
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    jobid_config = JobID()
+    jobid_config.set_unittest_config()
+
+    for each_model_type in GRID_SEARCH_SPACE_MAPPING.keys():
+        AutoGridSearchSpace.from_model_and_dataset_name(
+            each_model_type,
+            "base",
+            jobid_config.dat,
+            jobid_config.subdat, "hpo")
+
+
+def test_hpo_space():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.hpo.hpo_searchspace import AutoHPOSearchSpace, HPO_SEARCH_SPACE_MAPPING
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    jobid_config = JobID()
+    jobid_config.set_unittest_config()
+
+    for spa in HPO_SEARCH_SPACE_MAPPING.keys():
+        jobid_config.spa = spa
+        if jobid_config.spa == "cus":
+            custom_hpo_args = {"hpo_space": {"learning_rate": [1e-5]}}
+        elif jobid_config.spa == "buni":
+            best_config = {"learning_rate": 1e-5}
+            custom_hpo_args = {"points_to_evaluate": [best_config],
+                               "bound": {"learning_rate": {"u": best_config["learning_rate"]}}}
+        else:
+            custom_hpo_args = {}
+
+        AutoHPOSearchSpace.from_model_and_dataset_name(jobid_config.spa, jobid_config.pre, jobid_config.presz,
+                                                       jobid_config.dat, jobid_config.subdat, **custom_hpo_args)
+
+
+def test_trainer():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    num_train_epochs = 3
+    num_train_examples = 100
+    per_device_train_batch_size = 32
+    device_count = 1
+    max_steps = 1000
+    warmup_steps = 100
+    warmup_ratio = 0.1
+    trainer = TrainerForAutoTransformers(model_init=model_init)
+    trainer.convert_num_train_epochs_to_max_steps(num_train_epochs,
+                                                  num_train_examples,
+                                                  per_device_train_batch_size,
+                                                  device_count)
+    trainer.convert_max_steps_to_num_train_epochs(max_steps,
+                                                  num_train_examples,
+                                                  per_device_train_batch_size,
+                                                  device_count)
+    trainer.convert_warmup_ratio_to_warmup_steps(warmup_ratio,
+                                                 max_steps=max_steps,
+                                                 num_train_epochs=num_train_epochs,
+                                                 num_train_examples=num_train_examples,
+                                                 per_device_train_batch_size=per_device_train_batch_size,
+                                                 device_count=device_count)
+    trainer.convert_warmup_steps_to_warmup_ratio(warmup_steps,
+                                                 num_train_epochs,
+                                                 num_train_examples,
+                                                 per_device_train_batch_size,
+                                                 device_count)
+
+
+def test_switch_head():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.huggingface.switch_head_auto import AutoSeqClassificationHead, MODEL_CLASSIFICATION_HEAD_MAPPING
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    jobid_config = JobID()
+    jobid_config.set_unittest_config()
+    checkpoint_path = jobid_config.pre_full
+
+    model_config = AutoConfig.from_pretrained(
+        checkpoint_path,
+        num_labels=AutoConfig.from_pretrained(checkpoint_path).num_labels)
+
+    for model in list(MODEL_CLASSIFICATION_HEAD_MAPPING.keys()):
+        jobid_config.pre = model
+        AutoSeqClassificationHead \
+            .from_model_type_and_config(jobid_config.pre,
+                                        model_config)
+
+
+def test_wandb_utils():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.result_analysis.wandb_utils import WandbUtils
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    import os
+
+    args = get_console_args()
+    args.key_path = "."
+    jobid_config = JobID(args)
+
+    wandb_utils = WandbUtils(is_wandb_on=True, console_args=args, jobid_config=jobid_config)
+    os.environ["WANDB_MODE"] = "online"
+    wandb_utils.wandb_group_name = "test"
+    wandb_utils._get_next_trial_ids()
+    wandb_utils.set_wandb_per_run()
+
+
+if __name__ == "__main__":
+    test_wandb_utils()
+    test_dataprocess()
+    test_gridsearch_space()
+    test_hpo_space()
+    test_trainer()
+    test_switch_head()
--- a/test/hf/test_mobilebert.py
+++ b/test/hf/test_mobilebert.py
@ -1,7 +1,5 @@
 '''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
 '''
-# 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
-
 global azure_log_path
 global azure_key

@ -26,6 +24,7 @@ def get_autohf_settings():
                       "time_budget": 100000,
                       "ckpt_per_epoch": 1,
                       "fp16": False,
+                       "ray_local_mode": True
                       }
    return autohf_settings

@ -38,24 +37,38 @@ def test_hpo():

    from flaml.nlp import AutoTransformers
    from flaml.nlp import JobID
+    from flaml.nlp import AzureUtils

    jobid_config = JobID()
    jobid_config.set_unittest_config()
    autohf = AutoTransformers()

-    try:
-        preparedata_setting = get_preparedata_setting(jobid_config)
-        autohf.prepare_data(**preparedata_setting)
+    preparedata_setting = get_preparedata_setting(jobid_config)
+    autohf.prepare_data(**preparedata_setting)

-        autohf_settings = get_autohf_settings()
-        validation_metric, analysis = autohf.fit(**autohf_settings, )
+    autohf_settings = get_autohf_settings()
+    autohf_settings["points_to_evaluate"] = [{"learning_rate": 2e-5}]
+    validation_metric, analysis = autohf.fit(**autohf_settings)

-        predictions, test_metric = autohf.predict()
-        if test_metric:
-            validation_metric.update({"test": test_metric})
+    predictions, test_metric = autohf.predict()
+    if test_metric:
+        validation_metric.update({"test": test_metric})

-    except AssertionError:
-        pass
+    azure_utils = AzureUtils(root_log_path="logs_test/", autohf=autohf)
+    azure_utils._azure_key = "test"
+    azure_utils._container_name = "test"
+
+    configscore_list = azure_utils.extract_configscore_list_from_analysis(analysis)
+    azure_utils.write_autohf_output(configscore_list=configscore_list,
+                                    valid_metric=validation_metric,
+                                    predictions=predictions,
+                                    duration=autohf.last_run_duration)
+
+    jobid_config.mod = "grid"
+    autohf = AutoTransformers()
+
+    preparedata_setting = get_preparedata_setting(jobid_config)
+    autohf.prepare_data(**preparedata_setting)


 if __name__ == "__main__":