limit time and memory consumption (#264)

* limit time and memory * separate tests * lrl1 can't be limited by limit_resource * free memory when possible * passthrough=False when ensemble fails; retrain when trained_estimator is None * use callback to for resource limit * handle lower version of xgb with no callback * free mem ratio * reduce verbosity * retrain_final when max_iter==1 * remove trained_estimator from result * model_history * wheel * retrain time as best_config_train_time * ci: libomp version for xgboost on macos * limit_resource not working in windows * test pickle load * mute forecaster * notebook update * check hard * preventive callback * add use_ray
2021-11-03 19:08:23 -07:00 · 2021-11-03 19:08:23 -07:00 · 549a0dfb53
parent 6c66cd67f7
commit 549a0dfb53
12 changed files with 1761 additions and 1406 deletions
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@ -24,9 +24,11 @@ jobs:
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
-      - name: If mac, install libomp to facilitate lgbm install
+      - name: If mac, install libomp to facilitate lgbm and xgboost install
        if: matrix.os == 'macOS-latest'
        run: |
+          # remove libomp version constraint after xgboost works with libomp>11.1.0
+          wget https://raw.githubusercontent.com/Homebrew/homebrew-core/679923b4eb48a8dc7ecc1f05d06063cd79b3fc00/Formula/libomp.rb -O $(find $(brew --repository) -name libomp.rb)
          brew install libomp
          export CC=/usr/bin/clang
          export CXX=/usr/bin/clang++
@ -36,7 +38,7 @@ jobs:
          export LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib -L/usr/local/opt/libomp/lib -lomp"
      - name: Install packages and dependencies
        run: |
-          python -m pip install --upgrade pip
+          python -m pip install --upgrade pip wheel
          pip install -e .[test]
      - name: If linux or mac, install ray
        if: (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') && matrix.python-version != '3.9'
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -248,7 +248,7 @@ class AutoMLState:
            "wall_clock_time": time.time() - self._start_time_flag,
            "metric_for_logging": metric_for_logging,
            "val_loss": val_loss,
-            "trained_estimator": trained_estimator,
+            "trained_estimator": trained_estimator if self.save_model_history else None,
        }
        if sampled_weight is not None:
            self.fit_kwargs["sample_weight"] = weight
@ -403,9 +403,10 @@ class AutoML:

    @property
    def best_config_train_time(self):
-        """A float of the seconds taken by training the
-        best config."""
-        return self._search_states[self._best_estimator].best_config_train_time
+        """A float of the seconds taken by training the best config."""
+        return getattr(
+            self._search_states[self._best_estimator], "best_config_train_time", None
+        )

    @property
    def classes_(self):
@ -529,8 +530,9 @@ class AutoML:
            self._nrow, self._ndim = X_train_all.shape
            if self._state.task == TS_FORECAST:
                X_train_all = pd.DataFrame(X_train_all)
-                assert X_train_all[X_train_all.columns[0]].dtype.name == 'datetime64[ns]', (
-                    f"For '{TS_FORECAST}' task, the first column must contain timestamp values.")
+                assert (
+                    X_train_all[X_train_all.columns[0]].dtype.name == "datetime64[ns]"
+                ), f"For '{TS_FORECAST}' task, the first column must contain timestamp values."
            X, y = X_train_all, y_train_all
        elif dataframe is not None and label is not None:
            assert isinstance(
@ -539,8 +541,9 @@ class AutoML:
            assert label in dataframe.columns, "label must a column name in dataframe"
            self._df = True
            if self._state.task == TS_FORECAST:
-                assert dataframe[dataframe.columns[0]].dtype.name == 'datetime64[ns]', (
-                    f"For '{TS_FORECAST}' task, the first column must contain timestamp values.")
+                assert (
+                    dataframe[dataframe.columns[0]].dtype.name == "datetime64[ns]"
+                ), f"For '{TS_FORECAST}' task, the first column must contain timestamp values."
            X = dataframe.drop(columns=label)
            self._nrow, self._ndim = X.shape
            y = dataframe[label]
@ -584,7 +587,9 @@ class AutoML:
            else:
                self._state.X_val = X_val
            if self._label_transformer:
-                self._state.y_val = self._label_transformer.transform(y_val, self._state.task)
+                self._state.y_val = self._label_transformer.transform(
+                    y_val, self._state.task
+                )
            else:
                self._state.y_val = y_val
        else:
@ -1064,7 +1069,8 @@ class AutoML:
            return "holdout"
        nrow, dim = self._nrow, self._ndim
        if (
-            nrow * dim / 0.9 < SMALL_LARGE_THRES * (time_budget / 3600)
+            time_budget is None
+            or nrow * dim / 0.9 < SMALL_LARGE_THRES * (time_budget / 3600)
            and nrow < CV_HOLDOUT_THRESHOLD
        ):
            # time allows or sampling can be used and cv is necessary
@ -1301,6 +1307,7 @@ class AutoML:
        append_log=False,
        auto_augment=True,
        min_sample_size=MIN_SAMPLE_TRAIN,
+        use_ray=False,
        **fit_kwargs,
    ):
        """Find a model for a given task
@ -1414,7 +1421,9 @@ class AutoML:
                In the following code example, we get starting_points from the
                automl_experiment and use them in the new_automl_experiment.
                e.g.,
+
                .. code-block:: python
+
                    from flaml import AutoML
                    automl_experiment = AutoML()
                    X_train, y_train = load_iris(return_X_y=True)
@ -1440,6 +1449,10 @@ class AutoML:
                augment rare classes.
            min_sample_size: int, default=MIN_SAMPLE_TRAIN | the minimal sample
                size when sample=True.
+            use_ray: boolean, default=False | Whether to use ray to run the training
+                in separate processes. This can be used to prevent OOM for large
+                datasets, but will incur more overhead in time. Only use it if
+                you run into OOM failures.
            **fit_kwargs: Other key word arguments to pass to fit() function of
                the searched learners, such as sample_weight. Include period as
                a key word argument for 'ts_forecast' task.
@ -1483,8 +1496,10 @@ class AutoML:
        )
        self._retrain_final = (
            retrain_full is True
-            and (eval_method == "holdout" and self._state.X_val is None)
-            or (eval_method == "cv")
+            and eval_method == "holdout"
+            and self._state.X_val is None
+            or eval_method == "cv"
+            or max_iter == 1
        )
        self._auto_augment = auto_augment
        self._min_sample_size = min_sample_size
@ -1564,7 +1579,7 @@ class AutoML:
        logger.info("List of ML learners in AutoML Run: {}".format(estimator_list))
        self.estimator_list = estimator_list
        self._hpo_method = hpo_method or ("cfo" if n_concurrent_trials == 1 else "bs")
-        self._state.time_budget = time_budget
+        self._state.time_budget = time_budget or 1e10
        self._active_estimators = estimator_list.copy()
        self._ensemble = ensemble
        self._max_iter = max_iter
@ -1573,10 +1588,11 @@ class AutoML:
        self._state.train_time_limit = train_time_limit
        self._log_type = log_type
        self.split_ratio = split_ratio
-        self._save_model_history = model_history
+        self._state.save_model_history = model_history
        self._state.n_jobs = n_jobs
        self._n_concurrent_trials = n_concurrent_trials
        self._early_stop = early_stop
+        self._use_ray = use_ray or self._n_concurrent_trials > 1
        if log_file_name:
            with training_log_writer(log_file_name, append_log) as save_helper:
                self._training_log = save_helper
@ -1627,7 +1643,7 @@ class AutoML:
            from ray.tune.suggest import ConcurrencyLimiter
        except (ImportError, AssertionError):
            raise ImportError(
-                "n_concurrent_trial > 1 requires installation of ray. "
+                "n_concurrent_trial>1 or use_ray=True requires installation of ray. "
                "Please run pip install flaml[ray]"
            )
        if self._hpo_method in ("cfo", "grid"):
@ -1693,7 +1709,8 @@ class AutoML:
            resources_per_trial=resources_per_trial,
            time_budget_s=self._state.time_budget,
            num_samples=self._max_iter,
-            verbose=self.verbose,
+            verbose=max(self.verbose - 3, 0),
+            raise_on_failed_trial=False,
        )
        # logger.info([trial.last_result for trial in analysis.trials])
        trials = sorted(
@ -1712,7 +1729,7 @@ class AutoML:
                config = result["config"]
                estimator = config.get("ml", config)["learner"]
                search_state = self._search_states[estimator]
-                search_state.update(result, 0, self._save_model_history)
+                search_state.update(result, 0, self._state.save_model_history)
                if result["wall_clock_time"] is not None:
                    self._state.time_from_start = result["wall_clock_time"]
                if search_state.sample_size == self._state.data_size:
@ -1727,7 +1744,7 @@ class AutoML:
                        config,
                        self._time_taken_best_iter,
                    )
-                    if self._save_model_history:
+                    if self._state.save_model_history:
                        self._model_history[
                            _track_iter
                        ] = search_state.trained_estimator
@ -1902,7 +1919,7 @@ class AutoML:
                search_state.update(
                    result,
                    time_used=time_used,
-                    save_model_history=self._save_model_history,
+                    save_model_history=self._state.save_model_history,
                )
                if self._estimator_index is None:
                    # update init eci estimate
@ -1945,18 +1962,27 @@ class AutoML:
                        search_state.best_config,
                        self._state.time_from_start,
                    )
-                    if self._save_model_history:
+                    if self._state.save_model_history:
                        self._model_history[
                            self._track_iter
                        ] = search_state.trained_estimator
                    elif self._trained_estimator:
                        del self._trained_estimator
                        self._trained_estimator = None
+                    if not self._retrain_final:
                        self._trained_estimator = search_state.trained_estimator
                    self._best_iteration = self._track_iter
                    self._time_taken_best_iter = self._state.time_from_start
                    better = True
                    next_trial_time = search_state.time2eval_best
+                if search_state.trained_estimator and not (
+                    self._state.save_model_history or self._ensemble
+                ):
+                    # free RAM
+                    if search_state.trained_estimator != self._trained_estimator:
+                        search_state.trained_estimator.cleanup()
+                    del search_state.trained_estimator
+                    search_state.trained_estimator = None
                if better or self._log_type == "all":
                    if self._training_log:
                        self._training_log.append(
@ -2049,7 +2075,9 @@ class AutoML:
                logger.info(
                    "retrain {} for {:.1f}s".format(self._best_estimator, retrain_time)
                )
-                self._retrained_config[best_config_sig] = retrain_time
+                self._retrained_config[
+                    best_config_sig
+                ] = state.best_config_train_time = retrain_time
                est_retrain_time = 0
            self._state.time_from_start = time.time() - self._start_time_flag
            if (
@ -2083,7 +2111,7 @@ class AutoML:
        self._selected = None
        self.modelcount = 0

-        if self._n_concurrent_trials == 1:
+        if not self._use_ray:
            self._search_sequential()
        else:
            self._search_parallel()
@ -2103,12 +2131,29 @@ class AutoML:
                "regression",
            ):
                search_states = list(
-                    x for x in self._search_states.items() if x[1].trained_estimator
+                    x for x in self._search_states.items() if x[1].best_config
                )
                search_states.sort(key=lambda x: x[1].best_loss)
-                estimators = [(x[0], x[1].trained_estimator) for x in search_states[:2]]
+                estimators = [
+                    (
+                        x[0],
+                        x[1].learner_class(
+                            task=self._state.task,
+                            n_jobs=self._state.n_jobs,
+                            **x[1].best_config,
+                        ),
+                    )
+                    for x in search_states[:2]
+                ]
                estimators += [
-                    (x[0], x[1].trained_estimator)
+                    (
+                        x[0],
+                        x[1].learner_class(
+                            task=self._state.task,
+                            n_jobs=self._state.n_jobs,
+                            **x[1].best_config,
+                        ),
+                    )
                    for x in search_states[2:]
                    if x[1].best_loss < 4 * self._selected.best_loss
                ]
@ -2135,19 +2180,49 @@ class AutoML:
                )
                if self._sample_weight_full is not None:
                    self._state.fit_kwargs["sample_weight"] = self._sample_weight_full
+                for e in estimators:
+                    e[1].__class__.init()
+                try:
                    stacker.fit(
                        self._X_train_all, self._y_train_all, **self._state.fit_kwargs
                    )
                    logger.info(f"ensemble: {stacker}")
                    self._trained_estimator = stacker
                    self._trained_estimator.model = stacker
+                except ValueError as e:
+                    if passthrough:
+                        logger.warning(
+                            "Using passthrough=False for ensemble because the data contain categorical features."
+                        )
+                        stacker = Stacker(
+                            estimators,
+                            final_estimator,
+                            n_jobs=self._state.n_jobs,
+                            passthrough=False,
+                        )
+                        stacker.fit(
+                            self._X_train_all,
+                            self._y_train_all,
+                            **self._state.fit_kwargs,
+                        )
+                        logger.info(f"ensemble: {stacker}")
+                        self._trained_estimator = stacker
+                        self._trained_estimator.model = stacker
+                    else:
+                        raise e
            elif self._retrain_final:
                # reset time budget for retraining
+                if self._max_iter > 1:
                    self._state.time_from_start -= self._state.time_budget
-                if self._state.task == TS_FORECAST or (
+                if (
+                    self._state.task == TS_FORECAST
+                    or self._trained_estimator is None
+                    or (
                        self._state.time_budget - self._state.time_from_start
                        > self._selected.est_retrain_time(self.data_size_full)
-                    and self._selected.best_config_sample_size == self._state.data_size
+                        and self._selected.best_config_sample_size
+                        == self._state.data_size
+                    )
                ):
                    state = self._search_states[self._best_estimator]
                    (
@ -2163,6 +2238,7 @@ class AutoML:
                            self._best_estimator, retrain_time
                        )
                    )
+                    state.best_config_train_time = retrain_time
                    if self._trained_estimator:
                        logger.info(f"retrained model: {self._trained_estimator.model}")
                else:
--- a/flaml/data.py
+++ b/flaml/data.py
@ -275,7 +275,6 @@ class DataTransformer:
                            X[column] = X[column].map(datetime.toordinal)
                            datetime_columns.append(column)
                            del tmp_dt
-                        else:
                        X[column] = X[column].fillna(np.nan)
                        num_columns.append(column)
            X = X[cat_columns + num_columns]
--- a/flaml/model.py
+++ b/flaml/model.py
@ -2,20 +2,67 @@
 * Copyright (c) Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License.
 """
-
+from contextlib import contextmanager
+from functools import partial
+import signal
+import os
+from typing import Callable, List
 import numpy as np
 import time
 from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
 from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier
 from sklearn.linear_model import LogisticRegression
+from sklearn.dummy import DummyClassifier, DummyRegressor
 from scipy.sparse import issparse
 import pandas as pd
-from . import tune
-from .data import group_counts, CLASSIFICATION, TS_FORECAST, TS_TIMESTAMP_COL, TS_VALUE_COL
-
 import logging
+from . import tune
+from .data import (
+    group_counts,
+    CLASSIFICATION,
+    TS_FORECAST,
+    TS_TIMESTAMP_COL,
+    TS_VALUE_COL,
+)
+
+try:
+    import psutil
+except ImportError:
+    psutil = None
+try:
+    import resource
+except ImportError:
+    resource = None

 logger = logging.getLogger("flaml.automl")
+FREE_MEM_RATIO = 0.2
+
+
+def TimeoutHandler(sig, frame):
+    raise TimeoutError(sig, frame)
+
+
+@contextmanager
+def limit_resource(memory_limit, time_limit):
+    if memory_limit > 0:
+        soft, hard = resource.getrlimit(resource.RLIMIT_AS)
+        if soft < 0 and (hard < 0 or memory_limit <= hard) or memory_limit < soft:
+            resource.setrlimit(resource.RLIMIT_AS, (memory_limit, hard))
+    main_thread = False
+    if time_limit is not None:
+        try:
+            signal.signal(signal.SIGALRM, TimeoutHandler)
+            signal.alarm(int(time_limit) or 1)
+            main_thread = True
+        except ValueError:
+            pass
+    try:
+        yield
+    finally:
+        if main_thread:
+            signal.alarm(0)
+        if memory_limit > 0:
+            resource.setrlimit(resource.RLIMIT_AS, (soft, hard))


 class BaseEstimator:
@ -112,7 +159,35 @@ class BaseEstimator:
        Returns:
            train_time: A float of the training time in seconds
        """
-        return self._fit(X_train, y_train, **kwargs)
+        if (
+            getattr(self, "limit_resource", None)
+            and resource is not None
+            and (budget is not None or psutil is not None)
+        ):
+            start_time = time.time()
+            mem = psutil.virtual_memory() if psutil is not None else None
+            try:
+                with limit_resource(
+                    mem.available * (1 - FREE_MEM_RATIO)
+                    + psutil.Process(os.getpid()).memory_info().rss
+                    if mem is not None
+                    else -1,
+                    budget,
+                ):
+                    train_time = self._fit(X_train, y_train, **kwargs)
+            except (MemoryError, TimeoutError) as e:
+                logger.warning(f"{e.__class__} {e}")
+                if self._task in CLASSIFICATION:
+                    model = DummyClassifier()
+                else:
+                    model = DummyRegressor()
+                X_train = self._preprocess(X_train)
+                model.fit(X_train, y_train)
+                self._model = model
+                train_time = time.time() - start_time
+        else:
+            train_time = self._fit(X_train, y_train, **kwargs)
+        return train_time

    def predict(self, X_test):
        """Predict label from features
@ -223,6 +298,9 @@ class SKLearnEstimator(BaseEstimator):


 class LGBMEstimator(BaseEstimator):
+    ITER_HP = "n_estimators"
+    HAS_CALLBACK = True
+
    @classmethod
    def search_space(cls, data_size, **params):
        upper = min(32768, int(data_size))
@ -297,6 +375,8 @@ class LGBMEstimator(BaseEstimator):
            self.estimator_class = LGBMClassifier
        self._time_per_iter = None
        self._train_size = 0
+        self._mem_per_iter = 1
+        self.HAS_CALLBACK = self.HAS_CALLBACK and self._callbacks(0, 0) is not None

    def _preprocess(self, X):
        if (
@ -316,50 +396,111 @@ class LGBMEstimator(BaseEstimator):

    def fit(self, X_train, y_train, budget=None, **kwargs):
        start_time = time.time()
-        n_iter = self.params["n_estimators"]
+        deadline = start_time + budget if budget else np.inf
+        n_iter = self.params[self.ITER_HP]
        trained = False
+        if not self.HAS_CALLBACK:
+            mem0 = psutil.virtual_memory().available if psutil is not None else 1
            if (
-            (not self._time_per_iter or abs(self._train_size - X_train.shape[0]) > 4)
+                (
+                    not self._time_per_iter
+                    or abs(self._train_size - X_train.shape[0]) > 4
+                )
                and budget is not None
-            and n_iter > 1
-        ):
-            self.params["n_estimators"] = 1
+                or self._mem_per_iter <= 1
+                and psutil is not None
+            ) and n_iter > 1:
+                self.params[self.ITER_HP] = 1
                self._t1 = self._fit(X_train, y_train, **kwargs)
-            if self._t1 >= budget or n_iter == 1:
-                # self.params["n_estimators"] = n_iter
+                if budget is not None and self._t1 >= budget or n_iter == 1:
+                    # self.params[self.ITER_HP] = n_iter
                    return self._t1
-            self.params["n_estimators"] = min(n_iter, 4)
+                mem1 = psutil.virtual_memory().available if psutil is not None else 1
+                self._mem1 = mem0 - mem1
+                self.params[self.ITER_HP] = min(n_iter, 4)
                self._t2 = self._fit(X_train, y_train, **kwargs)
+                mem2 = psutil.virtual_memory().available if psutil is not None else 1
+                self._mem2 = max(mem0 - mem2, self._mem1)
+                # if self._mem1 <= 0:
+                #     self._mem_per_iter = self._mem2 / (self.params[self.ITER_HP] + 1)
+                # elif self._mem2 <= 0:
+                #     self._mem_per_iter = self._mem1
+                # else:
+                self._mem_per_iter = min(
+                    self._mem1, self._mem2 / self.params[self.ITER_HP]
+                )
+                if self._mem_per_iter <= 1 and psutil is not None:
+                    n_iter = self.params[self.ITER_HP]
                self._time_per_iter = (
-                (self._t2 - self._t1) / (self.params["n_estimators"] - 1)
+                    (self._t2 - self._t1) / (self.params[self.ITER_HP] - 1)
                    if self._t2 > self._t1
                    else self._t1
                    if self._t1
                    else 0.001
                )
                self._train_size = X_train.shape[0]
-            if self._t1 + self._t2 >= budget or n_iter == self.params["n_estimators"]:
-                # self.params["n_estimators"] = n_iter
+                if (
+                    budget is not None
+                    and self._t1 + self._t2 >= budget
+                    or n_iter == self.params[self.ITER_HP]
+                ):
+                    # self.params[self.ITER_HP] = n_iter
                    return time.time() - start_time
                trained = True
-        if budget is not None and n_iter > 1:
+            # logger.debug(mem0)
+            # logger.debug(self._mem_per_iter)
+            if n_iter > 1:
                max_iter = min(
                    n_iter,
                    int(
-                    (budget - time.time() + start_time - self._t1) / self._time_per_iter
+                        (budget - time.time() + start_time - self._t1)
+                        / self._time_per_iter
                        + 1
-                ),
                    )
-            if trained and max_iter <= self.params["n_estimators"]:
+                    if budget is not None
+                    else n_iter,
+                    int((1 - FREE_MEM_RATIO) * mem0 / self._mem_per_iter)
+                    if psutil is not None
+                    else n_iter,
+                )
+                if trained and max_iter <= self.params[self.ITER_HP]:
                    return time.time() - start_time
-            self.params["n_estimators"] = max_iter
-        if self.params["n_estimators"] > 0:
+                self.params[self.ITER_HP] = max_iter
+        if self.params[self.ITER_HP] > 0:
+            if self.HAS_CALLBACK:
+                self._fit(
+                    X_train, y_train, callbacks=self._callbacks(start_time, deadline), **kwargs
+                )
+                best_iteration = (
+                    self._model.get_booster().best_iteration
+                    if isinstance(self, XGBoostSklearnEstimator)
+                    else self._model.best_iteration_
+                )
+                if best_iteration is not None:
+                    self._model.set_params(n_estimators=best_iteration + 1)
+            else:
                self._fit(X_train, y_train, **kwargs)
        else:
-            self.params["n_estimators"] = self._model.n_estimators
+            self.params[self.ITER_HP] = self._model.n_estimators
        train_time = time.time() - start_time
        return train_time

+    def _callbacks(self, start_time, deadline) -> List[Callable]:
+        return [partial(self._callback, start_time, deadline)]
+
+    def _callback(self, start_time, deadline, env) -> None:
+        from lightgbm.callback import EarlyStopException
+
+        now = time.time()
+        if env.iteration == 0:
+            self._time_per_iter = now - start_time
+        if now + self._time_per_iter > deadline:
+            raise EarlyStopException(env.iteration, env.evaluation_result_list)
+        if psutil is not None:
+            mem = psutil.virtual_memory()
+            if mem.available / mem.total < FREE_MEM_RATIO:
+                raise EarlyStopException(env.iteration, env.evaluation_result_list)
+

 class XGBoostEstimator(SKLearnEstimator):
    """not using sklearn API, used for regression"""
@ -439,6 +580,7 @@ class XGBoostEstimator(SKLearnEstimator):
        import xgboost as xgb

        start_time = time.time()
+        deadline = start_time + budget if budget else np.inf
        if issparse(X_train):
            self.params["tree_method"] = "auto"
        else:
@ -456,9 +598,20 @@ class XGBoostEstimator(SKLearnEstimator):
            if "objective" in self.params:
                del self.params["objective"]
        _n_estimators = self.params.pop("n_estimators")
+        callbacks = XGBoostEstimator._callbacks(start_time, deadline)
+        if callbacks:
+            self._model = xgb.train(
+                self.params,
+                dtrain,
+                _n_estimators,
+                obj=obj,
+                callbacks=callbacks,
+            )
+            self.params["n_estimators"] = self._model.best_iteration + 1
+        else:
            self._model = xgb.train(self.params, dtrain, _n_estimators, obj=obj)
-        self.params["objective"] = objective
            self.params["n_estimators"] = _n_estimators
+        self.params["objective"] = objective
        del dtrain
        train_time = time.time() - start_time
        return train_time
@ -471,6 +624,28 @@ class XGBoostEstimator(SKLearnEstimator):
        dtest = xgb.DMatrix(X_test)
        return super().predict(dtest)

+    @classmethod
+    def _callbacks(cls, start_time, deadline):
+        try:
+            from xgboost.callback import TrainingCallback
+        except ImportError:  # for xgboost<1.3
+            return None
+
+        class ResourceLimit(TrainingCallback):
+            def after_iteration(self, model, epoch, evals_log) -> bool:
+                now = time.time()
+                if epoch == 0:
+                    self._time_per_iter = now - start_time
+                if now + self._time_per_iter > deadline:
+                    return True
+                if psutil is not None:
+                    mem = psutil.virtual_memory()
+                    if mem.available / mem.total < FREE_MEM_RATIO:
+                        return True
+                return False
+
+        return [ResourceLimit()]
+

 class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
    """using sklearn API, used for classification"""
@ -513,8 +688,13 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
            self.params["tree_method"] = "auto"
        return super().fit(X_train, y_train, budget, **kwargs)

+    def _callbacks(self, start_time, deadline) -> List[Callable]:
+        return XGBoostEstimator._callbacks(start_time, deadline)
+

 class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
+    HAS_CALLBACK = False
+
    @classmethod
    def search_space(cls, data_size, task, **params):
        data_size = int(data_size)
@ -607,6 +787,8 @@ class LRL1Classifier(SKLearnEstimator):


 class LRL2Classifier(SKLearnEstimator):
+    limit_resource = True
+
    @classmethod
    def search_space(cls, **params):
        return LRL1Classifier.search_space(**params)
@ -629,8 +811,7 @@ class LRL2Classifier(SKLearnEstimator):


 class CatBoostEstimator(BaseEstimator):
-    _time_per_iter = None
-    _train_size = 0
+    ITER_HP = "n_estimators"

    @classmethod
    def search_space(cls, data_size, **params):
@ -661,11 +842,6 @@ class CatBoostEstimator(BaseEstimator):
    def cost_relative2lgbm(cls):
        return 15

-    @classmethod
-    def init(cls):
-        CatBoostEstimator._time_per_iter = None
-        CatBoostEstimator._train_size = 0
-
    def _preprocess(self, X):
        if isinstance(X, pd.DataFrame):
            cat_columns = X.select_dtypes(include=["category"]).columns
@ -719,76 +895,13 @@ class CatBoostEstimator(BaseEstimator):
        import shutil

        start_time = time.time()
+        deadline = start_time + budget if budget else np.inf
        train_dir = f"catboost_{str(start_time)}"
-        n_iter = self.params["n_estimators"]
        X_train = self._preprocess(X_train)
        if isinstance(X_train, pd.DataFrame):
            cat_features = list(X_train.select_dtypes(include="category").columns)
        else:
            cat_features = []
-        # from catboost import CatBoostError
-        # try:
-        trained = False
-        if (
-            (
-                not CatBoostEstimator._time_per_iter
-                or abs(CatBoostEstimator._train_size - len(y_train)) > 4
-            )
-            and budget
-            and n_iter > 4
-        ):
-            # measure the time per iteration
-            self.params["n_estimators"] = 1
-            CatBoostEstimator._smallmodel = self.estimator_class(
-                train_dir=train_dir, **self.params
-            )
-            CatBoostEstimator._smallmodel.fit(
-                X_train, y_train, cat_features=cat_features, **kwargs
-            )
-            CatBoostEstimator._t1 = time.time() - start_time
-            if CatBoostEstimator._t1 >= budget or n_iter == 1:
-                # self.params["n_estimators"] = n_iter
-                self._model = CatBoostEstimator._smallmodel
-                shutil.rmtree(train_dir, ignore_errors=True)
-                return CatBoostEstimator._t1
-            self.params["n_estimators"] = min(n_iter, 4)
-            CatBoostEstimator._smallmodel = self.estimator_class(
-                train_dir=train_dir, **self.params
-            )
-            CatBoostEstimator._smallmodel.fit(
-                X_train, y_train, cat_features=cat_features, **kwargs
-            )
-            CatBoostEstimator._time_per_iter = (
-                time.time() - start_time - CatBoostEstimator._t1
-            ) / (self.params["n_estimators"] - 1)
-            if CatBoostEstimator._time_per_iter <= 0:
-                CatBoostEstimator._time_per_iter = CatBoostEstimator._t1
-            CatBoostEstimator._train_size = len(y_train)
-            if (
-                time.time() - start_time >= budget
-                or n_iter == self.params["n_estimators"]
-            ):
-                # self.params["n_estimators"] = n_iter
-                self._model = CatBoostEstimator._smallmodel
-                shutil.rmtree(train_dir, ignore_errors=True)
-                return time.time() - start_time
-            trained = True
-        if budget and n_iter > 4:
-            train_times = 1
-            max_iter = min(
-                n_iter,
-                int(
-                    (budget - time.time() + start_time - CatBoostEstimator._t1)
-                    / train_times
-                    / CatBoostEstimator._time_per_iter
-                    + 1
-                ),
-            )
-            self._model = CatBoostEstimator._smallmodel
-            if trained and max_iter <= self.params["n_estimators"]:
-                return time.time() - start_time
-            self.params["n_estimators"] = max_iter
-        if self.params["n_estimators"] > 0:
        n = max(int(len(y_train) * 0.9), len(y_train) - 1000)
        X_tr, y_tr = X_train[:n], y_train[:n]
        if "sample_weight" in kwargs:
@ -797,9 +910,21 @@ class CatBoostEstimator(BaseEstimator):
                kwargs["sample_weight"] = weight[:n]
        else:
            weight = None
-            from catboost import Pool
+        from catboost import Pool, __version__

        model = self.estimator_class(train_dir=train_dir, **self.params)
+        if __version__ >= "0.26":
+            model.fit(
+                X_tr,
+                y_tr,
+                cat_features=cat_features,
+                eval_set=Pool(
+                    data=X_train[n:], label=y_train[n:], cat_features=cat_features
+                ),
+                callbacks=CatBoostEstimator._callbacks(start_time, deadline),
+                **kwargs,
+            )
+        else:
            model.fit(
                X_tr,
                y_tr,
@ -808,18 +933,32 @@ class CatBoostEstimator(BaseEstimator):
                    data=X_train[n:], label=y_train[n:], cat_features=cat_features
                ),
                **kwargs,
-            )  # model.get_best_iteration()
+            )
        shutil.rmtree(train_dir, ignore_errors=True)
        if weight is not None:
            kwargs["sample_weight"] = weight
        self._model = model
-        else:
-            self.params["n_estimators"] = self._model.tree_count_
-        # except CatBoostError:
-        #     self._model = None
+        self.params[self.ITER_HP] = self._model.tree_count_
        train_time = time.time() - start_time
        return train_time

+    @classmethod
+    def _callbacks(cls, start_time, deadline):
+        class ResourceLimit:
+            def after_iteration(self, info) -> bool:
+                now = time.time()
+                if info.iteration == 1:
+                    self._time_per_iter = now - start_time
+                if now + self._time_per_iter > deadline:
+                    return False
+                if psutil is not None:
+                    mem = psutil.virtual_memory()
+                    if mem.available / mem.total < FREE_MEM_RATIO:
+                        return False
+                return True  # can continue
+
+        return [ResourceLimit()]
+

 class KNeighborsEstimator(BaseEstimator):
    @classmethod
@ -919,6 +1058,7 @@ class Prophet(SKLearnEstimator):
        model = Prophet(**self.params)
        for regressor in cols:
            model.add_regressor(regressor)
+        with suppress_stdout_stderr():
            model.fit(train_df)
        train_time = time.time() - current_time
        self._model = model
@ -984,14 +1124,20 @@ class ARIMA(Prophet):
        regressors = cols
        if regressors:
            model = ARIMA_estimator(
-                train_df[[TS_VALUE_COL]], exog=train_df[regressors], order=(
-                    self.params["p"], self.params["d"], self.params["q"]),
-                enforce_stationarity=False, enforce_invertibility=False)
+                train_df[[TS_VALUE_COL]],
+                exog=train_df[regressors],
+                order=(self.params["p"], self.params["d"], self.params["q"]),
+                enforce_stationarity=False,
+                enforce_invertibility=False,
+            )
        else:
            model = ARIMA_estimator(
-                train_df, order=(
-                    self.params["p"], self.params["d"], self.params["q"]),
-                enforce_stationarity=False, enforce_invertibility=False)
+                train_df,
+                order=(self.params["p"], self.params["d"], self.params["q"]),
+                enforce_stationarity=False,
+                enforce_invertibility=False,
+            )
+        with suppress_stdout_stderr():
            model = model.fit()
        train_time = time.time() - current_time
        self._model = model
@ -1010,7 +1156,9 @@ class ARIMA(Prophet):
                    regressors = list(X_test)
                    regressors.remove(TS_TIMESTAMP_COL)
                    X_test = self._preprocess(X_test)
-                    forecast = self._model.predict(start=start, end=end, exog=X_test[regressors])
+                    forecast = self._model.predict(
+                        start=start, end=end, exog=X_test[regressors]
+                    )
                else:
                    forecast = self._model.predict(start=start, end=end)
            else:
@ -1077,25 +1225,64 @@ class SARIMAX(ARIMA):
        regressors.remove(TS_VALUE_COL)
        if regressors:
            model = SARIMAX_estimator(
-                train_df[[TS_VALUE_COL]], exog=train_df[regressors], order=(
-                    self.params["p"], self.params["d"], self.params["q"]),
+                train_df[[TS_VALUE_COL]],
+                exog=train_df[regressors],
+                order=(self.params["p"], self.params["d"], self.params["q"]),
                seasonality_order=(
                    self.params["P"],
                    self.params["D"],
                    self.params["Q"],
-                    self.params["s"]),
-                enforce_stationarity=False, enforce_invertibility=False)
+                    self.params["s"],
+                ),
+                enforce_stationarity=False,
+                enforce_invertibility=False,
+            )
        else:
            model = SARIMAX_estimator(
-                train_df, order=(
-                    self.params["p"], self.params["d"], self.params["q"]),
+                train_df,
+                order=(self.params["p"], self.params["d"], self.params["q"]),
                seasonality_order=(
                    self.params["P"],
                    self.params["D"],
                    self.params["Q"],
-                    self.params["s"]),
-                enforce_stationarity=False, enforce_invertibility=False)
+                    self.params["s"],
+                ),
+                enforce_stationarity=False,
+                enforce_invertibility=False,
+            )
+        with suppress_stdout_stderr():
            model = model.fit()
        train_time = time.time() - current_time
        self._model = model
        return train_time
+
+
+class suppress_stdout_stderr(object):
+    """
+    A context manager for doing a "deep suppression" of stdout and stderr in
+    Python, i.e. will suppress all print, even if the print originates in a
+    compiled C/Fortran sub-function.
+       This will not suppress raised exceptions, since exceptions are printed
+    to stderr just before a script exits, and after the context manager has
+    exited.
+
+    """
+
+    def __init__(self):
+        # Open a pair of null files
+        self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)]
+        # Save the actual stdout (1) and stderr (2) file descriptors.
+        self.save_fds = (os.dup(1), os.dup(2))
+
+    def __enter__(self):
+        # Assign the null pointers to stdout and stderr.
+        os.dup2(self.null_fds[0], 1)
+        os.dup2(self.null_fds[1], 2)
+
+    def __exit__(self, *_):
+        # Re-assign the real stdout/stderr back to (1) and (2)
+        os.dup2(self.save_fds[0], 1)
+        os.dup2(self.save_fds[1], 2)
+        # Close the null files
+        os.close(self.null_fds[0])
+        os.close(self.null_fds[1])
--- a/notebook/flaml_automl.ipynb
+++ b/notebook/flaml_automl.ipynb
--- a/setup.py
+++ b/setup.py
@ -38,16 +38,16 @@ setuptools.setup(
        "notebook": [
            "openml==0.10.2",
            "jupyter",
-            "matplotlib==3.2.0",
+            "matplotlib",
            "rgf-python",
+            "catboost>=0.26",
        ],
        "test": [
            "flake8>=3.8.4",
            "pytest>=6.1.1",
            "coverage>=5.3",
            "pre-commit",
-            "xgboost<1.3",
-            "catboost>=0.23",
+            "catboost>=0.26",
            "rgf-python",
            "optuna==2.8.0",
            "vowpalwabbit",
@ -58,8 +58,9 @@ setuptools.setup(
            "datasets==1.4.1",
            "azure-storage-blob",
            "statsmodels>=0.12.2",
+            "psutil==5.8.0",
        ],
-        "catboost": ["catboost>=0.23"],
+        "catboost": ["catboost>=0.26"],
        "blendsearch": ["optuna==2.8.0"],
        "ray": [
            "ray[tune]==1.6.0",
@ -83,6 +84,7 @@ setuptools.setup(
        ],
        "ts_forecast": ["prophet>=1.0.1", "statsmodels>=0.12.2"],
        "forecast": ["prophet>=1.0.1", "statsmodels>=0.12.2"],
+        "benchmark": ["catboost>=0.26", "psutil==5.8.0", "xgboost==1.3.3"],
    },
    classifiers=[
        "Programming Language :: Python :: 3",
--- a/test/test_classification.py
+++ b/test/test_classification.py
@ -0,0 +1,323 @@
+import unittest
+import numpy as np
+import scipy.sparse
+from sklearn.datasets import load_breast_cancer
+import pandas as pd
+from datetime import datetime
+from flaml import AutoML
+from flaml.model import LGBMEstimator
+from flaml import tune
+
+
+class MyLargeLGBM(LGBMEstimator):
+    @classmethod
+    def search_space(cls, **params):
+        return {
+            "n_estimators": {
+                "domain": tune.lograndint(lower=4, upper=32768),
+                "init_value": 32768,
+                "low_cost_init_value": 4,
+            },
+            "num_leaves": {
+                "domain": tune.lograndint(lower=4, upper=32768),
+                "init_value": 32768,
+                "low_cost_init_value": 4,
+            },
+        }
+
+
+class TestClassification(unittest.TestCase):
+    def test_preprocess(self):
+        automl = AutoML()
+        X = pd.DataFrame(
+            {
+                "f1": [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
+                "f2": [
+                    3.0,
+                    16.0,
+                    10.0,
+                    12.0,
+                    3.0,
+                    14.0,
+                    11.0,
+                    12.0,
+                    5.0,
+                    14.0,
+                    20.0,
+                    16.0,
+                    15.0,
+                    11.0,
+                ],
+                "f3": [
+                    "a",
+                    "b",
+                    "a",
+                    "c",
+                    "c",
+                    "b",
+                    "b",
+                    "b",
+                    "b",
+                    "a",
+                    "b",
+                    1.0,
+                    1.0,
+                    "a",
+                ],
+                "f4": [
+                    True,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    False,
+                    False,
+                    True,
+                    True,
+                    False,
+                    False,
+                    True,
+                    True,
+                ],
+            }
+        )
+        y = pd.Series([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
+
+        automl = AutoML()
+        automl_settings = {
+            "time_budget": 6,
+            "task": "classification",
+            "n_jobs": 1,
+            "estimator_list": ["catboost", "lrl2"],
+            "eval_method": "cv",
+            "n_splits": 3,
+            "metric": "accuracy",
+            "log_training_metric": True,
+            # "verbose": 4,
+            "ensemble": True,
+        }
+        automl.fit(X, y, **automl_settings)
+
+        automl = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "task": "classification",
+            "n_jobs": 1,
+            "estimator_list": ["lrl2", "kneighbor"],
+            "eval_method": "cv",
+            "n_splits": 3,
+            "metric": "accuracy",
+            "log_training_metric": True,
+            "verbose": 4,
+            "ensemble": True,
+        }
+        automl.fit(X, y, **automl_settings)
+
+        automl = AutoML()
+        automl_settings = {
+            "time_budget": 3,
+            "task": "classification",
+            "n_jobs": 1,
+            "estimator_list": ["xgboost", "catboost", "kneighbor"],
+            "eval_method": "cv",
+            "n_splits": 3,
+            "metric": "accuracy",
+            "log_training_metric": True,
+            # "verbose": 4,
+            "ensemble": True,
+        }
+        automl.fit(X, y, **automl_settings)
+
+        automl = AutoML()
+        automl_settings = {
+            "time_budget": 3,
+            "task": "classification",
+            "n_jobs": 1,
+            "estimator_list": ["lgbm", "catboost", "kneighbor"],
+            "eval_method": "cv",
+            "n_splits": 3,
+            "metric": "accuracy",
+            "log_training_metric": True,
+            # "verbose": 4,
+            "ensemble": True,
+        }
+        automl.fit(X, y, **automl_settings)
+
+    def test_binary(self):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 1,
+            "task": "binary",
+            "log_file_name": "test/breast_cancer.log",
+            "log_training_metric": True,
+            "n_jobs": 1,
+            "model_history": True,
+        }
+        X_train, y_train = load_breast_cancer(return_X_y=True)
+        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+        _ = automl_experiment.predict(X_train)
+
+    def test_datetime_columns(self):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "log_file_name": "test/datetime_columns.log",
+            "log_training_metric": True,
+            "n_jobs": 1,
+            "model_history": True,
+        }
+        fake_df = pd.DataFrame(
+            {
+                "A": [
+                    datetime(1900, 2, 3),
+                    datetime(1900, 3, 4),
+                    datetime(1900, 3, 4),
+                    datetime(1900, 3, 4),
+                    datetime(1900, 7, 2),
+                    datetime(1900, 8, 9),
+                ],
+                "B": [
+                    datetime(1900, 1, 1),
+                    datetime(1900, 1, 1),
+                    datetime(1900, 1, 1),
+                    datetime(1900, 1, 1),
+                    datetime(1900, 1, 1),
+                    datetime(1900, 1, 1),
+                ],
+                "year_A": [
+                    datetime(1900, 1, 2),
+                    datetime(1900, 8, 1),
+                    datetime(1900, 1, 4),
+                    datetime(1900, 6, 1),
+                    datetime(1900, 1, 5),
+                    datetime(1900, 4, 1),
+                ],
+            }
+        )
+        y = np.array([0, 1, 0, 1, 0, 0])
+        automl_experiment.fit(X_train=fake_df, y_train=y, **automl_settings)
+        _ = automl_experiment.predict(fake_df)
+
+    def test_sparse_matrix_xgboost(self):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 3,
+            "metric": "ap",
+            "task": "classification",
+            "log_file_name": "test/sparse_classification.log",
+            "estimator_list": ["xgboost"],
+            "log_type": "all",
+            "n_jobs": 1,
+        }
+        X_train = scipy.sparse.eye(900000)
+        y_train = np.random.randint(2, size=900000)
+        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+        print(automl_experiment.predict(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+
+    def test_ray_classification(self):
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(1000, 10)
+        automl = AutoML()
+        try:
+            automl.fit(X, y, time_budget=10, task="classification", use_ray=True)
+            automl.fit(
+                X, y, time_budget=10, task="classification", n_concurrent_trials=2
+            )
+        except ImportError:
+            return
+
+    def test_parallel_xgboost(self, hpo_method=None):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 10,
+            "metric": "ap",
+            "task": "classification",
+            "log_file_name": "test/sparse_classification.log",
+            "estimator_list": ["xgboost"],
+            "log_type": "all",
+            "n_jobs": 1,
+            "n_concurrent_trials": 2,
+            "hpo_method": hpo_method,
+        }
+        X_train = scipy.sparse.eye(900000)
+        y_train = np.random.randint(2, size=900000)
+        try:
+            automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+            print(automl_experiment.predict(X_train))
+            print(automl_experiment.model)
+            print(automl_experiment.config_history)
+            print(automl_experiment.model_history)
+            print(automl_experiment.best_iteration)
+            print(automl_experiment.best_estimator)
+        except ImportError:
+            return
+
+    def test_parallel_xgboost_others(self):
+        # use random search as the hpo_method
+        self.test_parallel_xgboost(hpo_method="random")
+
+    def test_random_skip_oom(self):
+        automl_experiment = AutoML()
+        automl_experiment.add_learner(
+            learner_name="large_lgbm", learner_class=MyLargeLGBM
+        )
+        automl_settings = {
+            "time_budget": 2,
+            "task": "classification",
+            "log_file_name": "test/sparse_classification_oom.log",
+            "estimator_list": ["large_lgbm"],
+            "log_type": "all",
+            "n_jobs": 1,
+            "hpo_method": "random",
+            "n_concurrent_trials": 2,
+        }
+        X_train = scipy.sparse.eye(900000)
+        y_train = np.random.randint(2, size=900000)
+
+        try:
+            automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+            print(automl_experiment.predict(X_train))
+            print(automl_experiment.model)
+            print(automl_experiment.config_history)
+            print(automl_experiment.model_history)
+            print(automl_experiment.best_iteration)
+            print(automl_experiment.best_estimator)
+        except ImportError:
+            print("skipping concurrency test as ray is not installed")
+            return
+
+    def test_sparse_matrix_lr(self):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 3,
+            "metric": "f1",
+            "task": "classification",
+            "log_file_name": "test/sparse_classification.log",
+            "estimator_list": ["lrl1", "lrl2"],
+            "log_type": "all",
+            "n_jobs": 1,
+        }
+        X_train = scipy.sparse.random(3000, 3000, density=0.1)
+        y_train = np.random.randint(2, size=3000)
+        automl_experiment.fit(
+            X_train=X_train, y_train=y_train, train_time_limit=1, **automl_settings
+        )
+        automl_settings["time_budget"] = 5
+        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+        print(automl_experiment.predict(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/test_multiclass.py
+++ b/test/test_multiclass.py
@ -1,21 +1,12 @@
 import unittest
-
 import numpy as np
 import scipy.sparse
-from sklearn.datasets import (
-    fetch_california_housing,
-    load_iris,
-    load_wine,
-    load_breast_cancer,
-)
+from sklearn.datasets import load_iris, load_wine

-import pandas as pd
-from datetime import datetime

 from flaml import AutoML
 from flaml.data import CLASSIFICATION, get_output_from_log
-
-from flaml.model import LGBMEstimator, SKLearnEstimator, XGBoostEstimator
+from flaml.model import LGBMEstimator, XGBoostSklearnEstimator, SKLearnEstimator
 from flaml import tune
 from flaml.training_log import training_log_reader

@ -72,26 +63,21 @@ class MyRegularizedGreedyForest(SKLearnEstimator):
        return 1.0


-def logregobj(preds, dtrain):
-    labels = dtrain.get_label()
-    preds = 1.0 / (1.0 + np.exp(-preds))  # transform raw leaf weight
-    grad = preds - labels
-    hess = preds * (1.0 - preds)
-    return grad, hess
-
-
-class MyXGB1(XGBoostEstimator):
-    """XGBoostEstimator with logregobj as the objective function"""
-
-    def __init__(self, **config):
-        super().__init__(objective=logregobj, **config)
-
-
-class MyXGB2(XGBoostEstimator):
-    """XGBoostEstimator with 'reg:squarederror' as the objective function"""
-
-    def __init__(self, **config):
-        super().__init__(objective="reg:squarederror", **config)
+class MyLargeXGB(XGBoostSklearnEstimator):
+    @classmethod
+    def search_space(cls, **params):
+        return {
+            "n_estimators": {
+                "domain": tune.lograndint(lower=4, upper=32768),
+                "init_value": 32768,
+                "low_cost_init_value": 4,
+            },
+            "max_leaves": {
+                "domain": tune.lograndint(lower=4, upper=3276),
+                "init_value": 3276,
+                "low_cost_init_value": 4,
+            },
+        }


 class MyLargeLGBM(LGBMEstimator):
@ -104,8 +90,8 @@ class MyLargeLGBM(LGBMEstimator):
                "low_cost_init_value": 4,
            },
            "num_leaves": {
-                "domain": tune.lograndint(lower=4, upper=32768),
-                "init_value": 32768,
+                "domain": tune.lograndint(lower=4, upper=3276),
+                "init_value": 3276,
                "low_cost_init_value": 4,
            },
        }
@ -141,7 +127,7 @@ def custom_metric(
    }


-class TestAutoML(unittest.TestCase):
+class TestMultiClass(unittest.TestCase):
    def test_custom_learner(self):
        automl = AutoML()
        automl.add_learner(learner_name="RGF", learner_class=MyRegularizedGreedyForest)
@ -185,123 +171,6 @@ class TestAutoML(unittest.TestCase):
        """The main flaml automl API"""
        automl.fit(X_train=X_train, y_train=y_train, **settings)

-    def test_preprocess(self):
-        automl = AutoML()
-        X = pd.DataFrame(
-            {
-                "f1": [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
-                "f2": [
-                    3.0,
-                    16.0,
-                    10.0,
-                    12.0,
-                    3.0,
-                    14.0,
-                    11.0,
-                    12.0,
-                    5.0,
-                    14.0,
-                    20.0,
-                    16.0,
-                    15.0,
-                    11.0,
-                ],
-                "f3": [
-                    "a",
-                    "b",
-                    "a",
-                    "c",
-                    "c",
-                    "b",
-                    "b",
-                    "b",
-                    "b",
-                    "a",
-                    "b",
-                    1.0,
-                    1.0,
-                    "a",
-                ],
-                "f4": [
-                    True,
-                    True,
-                    False,
-                    True,
-                    True,
-                    False,
-                    False,
-                    False,
-                    True,
-                    True,
-                    False,
-                    False,
-                    True,
-                    True,
-                ],
-            }
-        )
-        y = pd.Series([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
-
-        automl = AutoML()
-        automl_settings = {
-            "time_budget": 6,
-            "task": "classification",
-            "n_jobs": 1,
-            "estimator_list": ["catboost", "lrl2"],
-            "eval_method": "cv",
-            "n_splits": 3,
-            "metric": "accuracy",
-            "log_training_metric": True,
-            "verbose": 4,
-            "ensemble": True,
-        }
-        automl.fit(X, y, **automl_settings)
-
-        automl = AutoML()
-        automl_settings = {
-            "time_budget": 2,
-            "task": "classification",
-            "n_jobs": 1,
-            "estimator_list": ["lrl2", "kneighbor"],
-            "eval_method": "cv",
-            "n_splits": 3,
-            "metric": "accuracy",
-            "log_training_metric": True,
-            "verbose": 4,
-            "ensemble": True,
-        }
-        automl.fit(X, y, **automl_settings)
-
-        automl = AutoML()
-        automl_settings = {
-            "time_budget": 3,
-            "task": "classification",
-            "n_jobs": 1,
-            "estimator_list": ["xgboost", "catboost", "kneighbor"],
-            "eval_method": "cv",
-            "n_splits": 3,
-            "metric": "accuracy",
-            "log_training_metric": True,
-            "verbose": 4,
-            "ensemble": True,
-        }
-        automl.fit(X, y, **automl_settings)
-
-        automl = AutoML()
-        automl_settings = {
-            "time_budget": 3,
-            "task": "classification",
-            "n_jobs": 1,
-            "estimator_list": ["lgbm", "catboost", "kneighbor"],
-            "eval_method": "cv",
-            "n_splits": 3,
-            "metric": "accuracy",
-            "log_training_metric": True,
-            "verbose": 4,
-            "ensemble": True,
-        }
-        automl.fit(X, y, **automl_settings)
-
    def test_dataframe(self):
        self.test_classification(True)

@ -348,20 +217,6 @@ class TestAutoML(unittest.TestCase):
        )
        print(metric_history)

-    def test_binary(self):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 1,
-            "task": "binary",
-            "log_file_name": "test/breast_cancer.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        X_train, y_train = load_breast_cancer(return_X_y=True)
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        _ = automl_experiment.predict(X_train)
-
    def test_classification(self, as_frame=False):
        automl_experiment = AutoML()
        automl_settings = {
@ -401,47 +256,6 @@ class TestAutoML(unittest.TestCase):
        print(automl_experiment.model)
        print(automl_experiment.predict_proba(X_train)[:5])

-    def test_datetime_columns(self):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 2,
-            "log_file_name": "test/datetime_columns.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        fake_df = pd.DataFrame(
-            {
-                "A": [
-                    datetime(1900, 2, 3),
-                    datetime(1900, 3, 4),
-                    datetime(1900, 3, 4),
-                    datetime(1900, 3, 4),
-                    datetime(1900, 7, 2),
-                    datetime(1900, 8, 9),
-                ],
-                "B": [
-                    datetime(1900, 1, 1),
-                    datetime(1900, 1, 1),
-                    datetime(1900, 1, 1),
-                    datetime(1900, 1, 1),
-                    datetime(1900, 1, 1),
-                    datetime(1900, 1, 1),
-                ],
-                "year_A": [
-                    datetime(1900, 1, 2),
-                    datetime(1900, 8, 1),
-                    datetime(1900, 1, 4),
-                    datetime(1900, 6, 1),
-                    datetime(1900, 1, 5),
-                    datetime(1900, 4, 1),
-                ],
-            }
-        )
-        y = np.array([0, 1, 0, 1, 0, 0])
-        automl_experiment.fit(X_train=fake_df, y_train=y, **automl_settings)
-        _ = automl_experiment.predict(fake_df)
-
    def test_micro_macro_f1(self):
        automl_experiment_micro = AutoML()
        automl_experiment_macro = AutoML()
@ -501,50 +315,6 @@ class TestAutoML(unittest.TestCase):
        X_train, y_train = load_iris(return_X_y=True)
        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)

-    def test_regression(self):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 2,
-            "task": "regression",
-            "log_file_name": "test/california.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        X_train, y_train = fetch_california_housing(return_X_y=True)
-        n = int(len(y_train) * 9 // 10)
-        automl_experiment.fit(
-            X_train=X_train[:n],
-            y_train=y_train[:n],
-            X_val=X_train[n:],
-            y_val=y_train[n:],
-            **automl_settings
-        )
-        assert automl_experiment._state.eval_method == "holdout"
-        print(automl_experiment.predict(X_train))
-        print(automl_experiment.model)
-        print(automl_experiment.config_history)
-        print(automl_experiment.model_history)
-        print(automl_experiment.best_iteration)
-        print(automl_experiment.best_estimator)
-        print(get_output_from_log(automl_settings["log_file_name"], 1))
-        automl_experiment.retrain_from_log(
-            task="regression",
-            log_file_name=automl_settings["log_file_name"],
-            X_train=X_train,
-            y_train=y_train,
-            train_full=True,
-            time_budget=1,
-        )
-        automl_experiment.retrain_from_log(
-            task="regression",
-            log_file_name=automl_settings["log_file_name"],
-            X_train=X_train,
-            y_train=y_train,
-            train_full=True,
-            time_budget=0,
-        )
-
    def test_sparse_matrix_classification(self):
        automl_experiment = AutoML()
        automl_settings = {
@ -567,236 +337,51 @@ class TestAutoML(unittest.TestCase):
        print(automl_experiment.best_iteration)
        print(automl_experiment.best_estimator)

-    def test_sparse_matrix_regression(self):
-        X_train = scipy.sparse.random(300, 900, density=0.0001)
-        y_train = np.random.uniform(size=300)
-        X_val = scipy.sparse.random(100, 900, density=0.0001)
-        y_val = np.random.uniform(size=100)
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 2,
-            "metric": "mae",
-            "task": "regression",
-            "log_file_name": "test/sparse_regression.log",
-            "n_jobs": 1,
-            "model_history": True,
-            "keep_search_state": True,
-            "verbose": 0,
-            "early_stop": True,
-        }
-        automl_experiment.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
-        assert automl_experiment._state.X_val.shape == X_val.shape
-        print(automl_experiment.predict(X_train))
-        print(automl_experiment.model)
-        print(automl_experiment.config_history)
-        print(automl_experiment.model_history)
-        print(automl_experiment.best_iteration)
-        print(automl_experiment.best_estimator)
-        print(automl_experiment.best_config)
-        print(automl_experiment.best_loss)
-        print(automl_experiment.best_config_train_time)
-
-    def test_sparse_matrix_xgboost(self):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 3,
-            "metric": "ap",
-            "task": "classification",
-            "log_file_name": "test/sparse_classification.log",
-            "estimator_list": ["xgboost"],
-            "log_type": "all",
-            "n_jobs": 1,
-        }
-        X_train = scipy.sparse.eye(900000)
-        y_train = np.random.randint(2, size=900000)
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        print(automl_experiment.predict(X_train))
-        print(automl_experiment.model)
-        print(automl_experiment.config_history)
-        print(automl_experiment.model_history)
-        print(automl_experiment.best_iteration)
-        print(automl_experiment.best_estimator)
-
-    def test_parallel(self, hpo_method=None):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 10,
-            "task": "regression",
-            "log_file_name": "test/california.log",
-            "log_type": "all",
-            "n_jobs": 1,
-            "n_concurrent_trials": 10,
-            "hpo_method": hpo_method,
-        }
-        X_train, y_train = fetch_california_housing(return_X_y=True)
-        try:
-            automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-            print(automl_experiment.predict(X_train))
-            print(automl_experiment.model)
-            print(automl_experiment.config_history)
-            print(automl_experiment.model_history)
-            print(automl_experiment.best_iteration)
-            print(automl_experiment.best_estimator)
-        except ImportError:
-            return
-
-    def test_parallel_classification(self):
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(1000, 10)
-        automl = AutoML()
-        try:
-            automl.fit(
-                X, y, time_budget=10, task="classification", n_concurrent_trials=2
-            )
-        except ImportError:
-            return
-
-    def test_parallel_xgboost(self, hpo_method=None):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 10,
-            "metric": "ap",
-            "task": "classification",
-            "log_file_name": "test/sparse_classification.log",
-            "estimator_list": ["xgboost"],
-            "log_type": "all",
-            "n_jobs": 1,
-            "n_concurrent_trials": 2,
-            "hpo_method": hpo_method,
-        }
-        X_train = scipy.sparse.eye(900000)
-        y_train = np.random.randint(2, size=900000)
-        try:
-            automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-            print(automl_experiment.predict(X_train))
-            print(automl_experiment.model)
-            print(automl_experiment.config_history)
-            print(automl_experiment.model_history)
-            print(automl_experiment.best_iteration)
-            print(automl_experiment.best_estimator)
-        except ImportError:
-            return
-
-    def test_parallel_xgboost_others(self):
-        # use random search as the hpo_method
-        self.test_parallel_xgboost(hpo_method="random")
-
-    def test_random_out_of_memory(self):
+    def _test_memory_limit(self):
        automl_experiment = AutoML()
        automl_experiment.add_learner(
            learner_name="large_lgbm", learner_class=MyLargeLGBM
        )
        automl_settings = {
-            "time_budget": 2,
-            "metric": "ap",
+            "time_budget": None,
            "task": "classification",
-            "log_file_name": "test/sparse_classification_oom.log",
+            "log_file_name": "test/classification_oom.log",
            "estimator_list": ["large_lgbm"],
            "log_type": "all",
-            "n_jobs": 1,
-            "n_concurrent_trials": 2,
            "hpo_method": "random",
        }
+        X_train, y_train = load_iris(return_X_y=True, as_frame=True)

-        X_train = scipy.sparse.eye(900000)
-        y_train = np.random.randint(2, size=900000)
-        try:
-            automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-            print(automl_experiment.predict(X_train))
-            print(automl_experiment.model)
-            print(automl_experiment.config_history)
-            print(automl_experiment.model_history)
-            print(automl_experiment.best_iteration)
-            print(automl_experiment.best_estimator)
-        except ImportError:
-            return
-
-    def test_sparse_matrix_lr(self):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 2,
-            "metric": "f1",
-            "task": "classification",
-            "log_file_name": "test/sparse_classification.log",
-            "estimator_list": ["lrl1", "lrl2"],
-            "log_type": "all",
-            "n_jobs": 1,
-        }
-        X_train = scipy.sparse.random(3000, 900, density=0.1)
-        y_train = np.random.randint(2, size=3000)
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        print(automl_experiment.predict(X_train))
-        print(automl_experiment.model)
-        print(automl_experiment.config_history)
-        print(automl_experiment.model_history)
-        print(automl_experiment.best_iteration)
-        print(automl_experiment.best_estimator)
-
-    def test_sparse_matrix_regression_holdout(self):
-        X_train = scipy.sparse.random(8, 100)
-        y_train = np.random.uniform(size=8)
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 1,
-            "eval_method": "holdout",
-            "task": "regression",
-            "log_file_name": "test/sparse_regression.log",
-            "n_jobs": 1,
-            "model_history": True,
-            "metric": "mse",
-            "sample_weight": np.ones(len(y_train)),
-            "early_stop": True,
-        }
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        print(automl_experiment.predict(X_train))
-        print(automl_experiment.model)
-        print(automl_experiment.config_history)
-        print(automl_experiment.model_history)
-        print(automl_experiment.best_iteration)
-        print(automl_experiment.best_estimator)
-
-    def test_regression_xgboost(self):
-        X_train = scipy.sparse.random(300, 900, density=0.0001)
-        y_train = np.random.uniform(size=300)
-        X_val = scipy.sparse.random(100, 900, density=0.0001)
-        y_val = np.random.uniform(size=100)
-        automl_experiment = AutoML()
-        automl_experiment.add_learner(learner_name="my_xgb1", learner_class=MyXGB1)
-        automl_experiment.add_learner(learner_name="my_xgb2", learner_class=MyXGB2)
-        automl_settings = {
-            "time_budget": 2,
-            "estimator_list": ["my_xgb1", "my_xgb2"],
-            "task": "regression",
-            "log_file_name": "test/regression_xgboost.log",
-            "n_jobs": 1,
-            "model_history": True,
-            "keep_search_state": True,
-            "early_stop": True,
-        }
        automl_experiment.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
+            X_train=X_train, y_train=y_train, max_iter=1, **automl_settings
        )
-        assert automl_experiment._state.X_val.shape == X_val.shape
-        print(automl_experiment.predict(X_train))
        print(automl_experiment.model)
-        print(automl_experiment.config_history)
-        print(automl_experiment.model_history)
-        print(automl_experiment.best_iteration)
-        print(automl_experiment.best_estimator)
-        print(automl_experiment.best_config)
-        print(automl_experiment.best_loss)
-        print(automl_experiment.best_config_train_time)
+
+    def test_time_limit(self):
+        automl_experiment = AutoML()
+        automl_experiment.add_learner(
+            learner_name="large_lgbm", learner_class=MyLargeLGBM
+        )
+        automl_experiment.add_learner(
+            learner_name="large_xgb", learner_class=MyLargeXGB
+        )
+        automl_settings = {
+            "time_budget": 0.5,
+            "task": "classification",
+            "log_file_name": "test/classification_timeout.log",
+            "estimator_list": ["catboost"],
+            "log_type": "all",
+            "hpo_method": "random",
+        }
+        X_train, y_train = load_iris(return_X_y=True, as_frame=True)
+        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+        print(automl_experiment.model.params)
+        automl_settings["estimator_list"] = ["large_xgb"]
+        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+        print(automl_experiment.model)
+        automl_settings["estimator_list"] = ["large_lgbm"]
+        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+        print(automl_experiment.model)

    def test_fit_w_starting_point(self, as_frame=True):
        automl_experiment = AutoML()
--- a/test/test_notebook_example.py
+++ b/test/test_notebook_example.py
@ -60,7 +60,7 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
        valid_loss_history,
        config_history,
        metric_history,
-    ) = get_output_from_log(filename=settings["log_file_name"], time_budget=60)
+    ) = get_output_from_log(filename=settings["log_file_name"], time_budget=6)
    for config in config_history:
        print(config)
    print(automl.prune_attr)
--- a/test/test_python_log.py
+++ b/test/test_python_log.py
@ -113,3 +113,9 @@ class TestLogging(unittest.TestCase):
        with open("automl.pkl", "wb") as f:
            pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
        print(automl.__version__)
+        pred1 = automl.predict(X_train)
+        with open("automl.pkl", "rb") as f:
+            automl = pickle.load(f)
+        pred2 = automl.predict(X_train)
+        delta = pred1 - pred2
+        assert max(delta) == 0 and min(delta) == 0
--- a/test/test_regression.py
+++ b/test/test_regression.py
@ -0,0 +1,221 @@
+import unittest
+import numpy as np
+import scipy.sparse
+from sklearn.datasets import (
+    fetch_california_housing,
+)
+
+from flaml import AutoML
+from flaml.data import get_output_from_log
+from flaml.model import XGBoostEstimator
+
+
+def logregobj(preds, dtrain):
+    labels = dtrain.get_label()
+    preds = 1.0 / (1.0 + np.exp(-preds))  # transform raw leaf weight
+    grad = preds - labels
+    hess = preds * (1.0 - preds)
+    return grad, hess
+
+
+class MyXGB1(XGBoostEstimator):
+    """XGBoostEstimator with logregobj as the objective function"""
+
+    def __init__(self, **config):
+        super().__init__(objective=logregobj, **config)
+
+
+class MyXGB2(XGBoostEstimator):
+    """XGBoostEstimator with 'reg:squarederror' as the objective function"""
+
+    def __init__(self, **config):
+        super().__init__(objective="reg:squarederror", **config)
+
+
+class TestRegression(unittest.TestCase):
+    def test_regression(self):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "task": "regression",
+            "log_file_name": "test/california.log",
+            "log_training_metric": True,
+            "n_jobs": 1,
+            "model_history": True,
+        }
+        X_train, y_train = fetch_california_housing(return_X_y=True)
+        n = int(len(y_train) * 9 // 10)
+        automl_experiment.fit(
+            X_train=X_train[:n],
+            y_train=y_train[:n],
+            X_val=X_train[n:],
+            y_val=y_train[n:],
+            **automl_settings
+        )
+        assert automl_experiment._state.eval_method == "holdout"
+        print(automl_experiment.predict(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+        print(get_output_from_log(automl_settings["log_file_name"], 1))
+        automl_experiment.retrain_from_log(
+            task="regression",
+            log_file_name=automl_settings["log_file_name"],
+            X_train=X_train,
+            y_train=y_train,
+            train_full=True,
+            time_budget=1,
+        )
+        automl_experiment.retrain_from_log(
+            task="regression",
+            log_file_name=automl_settings["log_file_name"],
+            X_train=X_train,
+            y_train=y_train,
+            train_full=True,
+            time_budget=0,
+        )
+
+    def test_sparse_matrix_classification(self):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "metric": "auto",
+            "task": "classification",
+            "log_file_name": "test/sparse_classification.log",
+            "split_type": "uniform",
+            "n_jobs": 1,
+            "model_history": True,
+        }
+        X_train = scipy.sparse.random(1554, 21, dtype=int)
+        y_train = np.random.randint(3, size=1554)
+        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+        print(automl_experiment.classes_)
+        print(automl_experiment.predict_proba(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+
+    def test_sparse_matrix_regression(self):
+        X_train = scipy.sparse.random(300, 900, density=0.0001)
+        y_train = np.random.uniform(size=300)
+        X_val = scipy.sparse.random(100, 900, density=0.0001)
+        y_val = np.random.uniform(size=100)
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "metric": "mae",
+            "task": "regression",
+            "log_file_name": "test/sparse_regression.log",
+            "n_jobs": 1,
+            "model_history": True,
+            "keep_search_state": True,
+            "verbose": 0,
+            "early_stop": True,
+        }
+        automl_experiment.fit(
+            X_train=X_train,
+            y_train=y_train,
+            X_val=X_val,
+            y_val=y_val,
+            **automl_settings
+        )
+        assert automl_experiment._state.X_val.shape == X_val.shape
+        print(automl_experiment.predict(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+        print(automl_experiment.best_config)
+        print(automl_experiment.best_loss)
+        print(automl_experiment.best_config_train_time)
+
+    def test_parallel(self, hpo_method=None):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 10,
+            "task": "regression",
+            "log_file_name": "test/california.log",
+            "log_type": "all",
+            "n_jobs": 1,
+            "n_concurrent_trials": 10,
+            "hpo_method": hpo_method,
+        }
+        X_train, y_train = fetch_california_housing(return_X_y=True)
+        try:
+            automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+            print(automl_experiment.predict(X_train))
+            print(automl_experiment.model)
+            print(automl_experiment.config_history)
+            print(automl_experiment.model_history)
+            print(automl_experiment.best_iteration)
+            print(automl_experiment.best_estimator)
+        except ImportError:
+            return
+
+    def test_sparse_matrix_regression_holdout(self):
+        X_train = scipy.sparse.random(8, 100)
+        y_train = np.random.uniform(size=8)
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 1,
+            "eval_method": "holdout",
+            "task": "regression",
+            "log_file_name": "test/sparse_regression.log",
+            "n_jobs": 1,
+            "model_history": True,
+            "metric": "mse",
+            "sample_weight": np.ones(len(y_train)),
+            "early_stop": True,
+        }
+        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+        print(automl_experiment.predict(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+
+    def test_regression_xgboost(self):
+        X_train = scipy.sparse.random(300, 900, density=0.0001)
+        y_train = np.random.uniform(size=300)
+        X_val = scipy.sparse.random(100, 900, density=0.0001)
+        y_val = np.random.uniform(size=100)
+        automl_experiment = AutoML()
+        automl_experiment.add_learner(learner_name="my_xgb1", learner_class=MyXGB1)
+        automl_experiment.add_learner(learner_name="my_xgb2", learner_class=MyXGB2)
+        automl_settings = {
+            "time_budget": 2,
+            "estimator_list": ["my_xgb1", "my_xgb2"],
+            "task": "regression",
+            "log_file_name": "test/regression_xgboost.log",
+            "n_jobs": 1,
+            "model_history": True,
+            "keep_search_state": True,
+            "early_stop": True,
+        }
+        automl_experiment.fit(
+            X_train=X_train,
+            y_train=y_train,
+            X_val=X_val,
+            y_val=y_val,
+            **automl_settings
+        )
+        assert automl_experiment._state.X_val.shape == X_val.shape
+        print(automl_experiment.predict(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+        print(automl_experiment.best_config)
+        print(automl_experiment.best_loss)
+        print(automl_experiment.best_config_train_time)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/test_training_log.py
+++ b/test/test_training_log.py
@ -30,6 +30,7 @@ class TestTrainingLog(unittest.TestCase):
                # "ensemble": True,
                "keep_search_state": True,
                "estimator_list": estimator_list,
+                "model_history": True,
            }
            X_train, y_train = fetch_california_housing(return_X_y=True)
            automl.fit(X_train=X_train, y_train=y_train, **automl_settings)