Integrate multivariate time series forecasting (#254)

* Integrate multivariate time series forecasting, now supports continuous and categorical variables - update data.py to transform time series data - update search space - update documentations to reflect changes - update test_forecast.py - rename 'forecast' task to 'ts_forecast' task * update automl.py and test_forecast.py * update forecast notebook * update README.md and setup.py * update ml.py and test_forecast.py - make "ds" and "y" constant variables * replace constants with constant variables * bump version to 0.7.0 * update setup.py - support 'forecast' and 'ts_forecast' * update automl.py and data.py - support 'forecast' and 'ts_forecast' tasks
2021-10-30 12:48:57 -04:00 · 2021-10-30 12:48:57 -04:00 · 519bfc2a18
parent e0155c2339
commit 519bfc2a18
9 changed files with 482 additions and 201 deletions
--- a/README.md
+++ b/README.md
@ -127,7 +127,7 @@ print(automl.model)
 * Time series forecasting.

 ```python
-# pip install flaml[forecast]
+# pip install flaml[ts_forecast]
 import numpy as np
 from flaml import AutoML
 X_train = np.arange('2014-01', '2021-01', dtype='datetime64[M]')
@ -136,8 +136,8 @@ automl = AutoML()
 automl.fit(X_train=X_train[:72],  # a single column of timestamp
           y_train=y_train,  # value for each timestamp
           period=12,  # time horizon to forecast, e.g., 12 months
-           task='forecast', time_budget=15,  # time budget in seconds
-           log_file_name="test/forecast.log",
+           task='ts_forecast', time_budget=15,  # time budget in seconds
+           log_file_name="test/ts_forecast.log",
          )
 print(automl.predict(X_train[72:]))
 ```
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -36,7 +36,7 @@ from .config import (
    N_SPLITS,
    SAMPLE_MULTIPLY_FACTOR,
 )
-from .data import concat, CLASSIFICATION
+from .data import concat, CLASSIFICATION, TS_FORECAST, FORECAST
 from . import tune
 from .training_log import training_log_reader, training_log_writer

@ -428,10 +428,22 @@ class AutoML:

        Args:
            X_test: A numpy array of featurized instances, shape n * m,
-                or for 'forecasting' task:
-                    a pandas dataframe with one column of timestamp values
-                    or an integer n for the predict steps (only valid when
-                    the estimator is arima or sarimax).
+                or for 'ts_forecast' task:
+                    a pandas dataframe with the first column containing
+                    timestamp values (datetime type) or an integer n for
+                    the predict steps (only valid when the estimator is
+                    arima or sarimax). Other columns in the dataframe
+                    are assumed to be exogenous variables (categorical
+                    or numeric).
+
+                    .. code-block:: python
+
+                        multivariate_X_test = pd.DataFrame({
+                            'timeStamp': pd.date_range(start='1/1/2022', end='1/07/2022'),
+                            'categorical_col': ['yes', 'yes', 'no', 'no', 'yes', 'no', 'yes'],
+                            'continuous_col': [105, 107, 120, 118, 110, 112, 115]
+                        })
+                        model.predict(multivariate_X_test)

        Returns:
            A array-like of shape n * 1 - - each element is a predicted
@ -472,14 +484,12 @@ class AutoML:
    def _preprocess(self, X):
        if isinstance(X, int):
            return X
-        if self._state.task == "forecast":
+        if self._state.task == TS_FORECAST:
            X = pd.DataFrame(X)
-            X = X.rename(columns={X.columns[0]: "ds"})
-        else:
-            if issparse(X):
-                X = X.tocsr()
-            if self._transformer:
-                X = self._transformer.transform(X)
+        if issparse(X):
+            X = X.tocsr()
+        if self._transformer:
+            X = self._transformer.transform(X, self._state.task)
        return X

    def _validate_data(
@ -493,23 +503,6 @@ class AutoML:
        groups_val=None,
        groups=None,
    ):
-        if self._state.task == "forecast":
-            if dataframe is not None and label is not None:
-                dataframe = dataframe.copy()
-                dataframe = dataframe.rename(columns={label[0]: "ds", label[1]: "y"})
-            elif dataframe is not None:
-                assert "ds" in dataframe and "y" in dataframe, (
-                    "For forecasting task, dataframe must have columns "
-                    '"ds" and "y" with the dates and values respectively.'
-                )
-            elif (X_train_all is not None) and (y_train_all is not None):
-                dataframe = pd.DataFrame(X_train_all)
-                dataframe = dataframe.rename(columns={dataframe.columns[0]: "ds"})
-                dataframe["y"] = pd.Series(y_train_all)
-                X_train_all = None
-                y_train_all = None
-            label = "y"
-
        if X_train_all is not None and y_train_all is not None:
            assert (
                isinstance(X_train_all, np.ndarray)
@ -525,6 +518,8 @@ class AutoML:
            assert (
                X_train_all.size != 0 and y_train_all.size != 0
            ), "Input data must not be empty."
+            if isinstance(X_train_all, np.ndarray) and len(X_train_all.shape) == 1:
+                X_train_all = np.reshape(X_train_all, (X_train_all.size, 1))
            if isinstance(y_train_all, np.ndarray):
                y_train_all = y_train_all.flatten()
            assert (
@ -532,6 +527,10 @@ class AutoML:
            ), "# rows in X_train must match length of y_train."
            self._df = isinstance(X_train_all, pd.DataFrame)
            self._nrow, self._ndim = X_train_all.shape
+            if self._state.task == TS_FORECAST:
+                X_train_all = pd.DataFrame(X_train_all)
+                assert X_train_all[X_train_all.columns[0]].dtype.name == 'datetime64[ns]', (
+                    f"For '{TS_FORECAST}' task, the first column must contain timestamp values.")
            X, y = X_train_all, y_train_all
        elif dataframe is not None and label is not None:
            assert isinstance(
@ -539,12 +538,15 @@ class AutoML:
            ), "dataframe must be a pandas DataFrame"
            assert label in dataframe.columns, "label must a column name in dataframe"
            self._df = True
+            if self._state.task == TS_FORECAST:
+                assert dataframe[dataframe.columns[0]].dtype.name == 'datetime64[ns]', (
+                    f"For '{TS_FORECAST}' task, the first column must contain timestamp values.")
            X = dataframe.drop(columns=label)
            self._nrow, self._ndim = X.shape
            y = dataframe[label]
        else:
            raise ValueError("either X_train+y_train or dataframe+label are required")
-        if issparse(X_train_all) or self._state.task == "forecast":
+        if issparse(X_train_all):
            self._transformer = self._label_transformer = False
            self._X_train_all, self._y_train_all = X, y
        else:
@ -578,11 +580,11 @@ class AutoML:
                X_val.shape[0] == y_val.shape[0]
            ), "# rows in X_val must match length of y_val."
            if self._transformer:
-                self._state.X_val = self._transformer.transform(X_val)
+                self._state.X_val = self._transformer.transform(X_val, self._state.task)
            else:
                self._state.X_val = X_val
            if self._label_transformer:
-                self._state.y_val = self._label_transformer.transform(y_val)
+                self._state.y_val = self._label_transformer.transform(y_val, self._state.task)
            else:
                self._state.y_val = y_val
        else:
@ -668,7 +670,7 @@ class AutoML:
        if X_val is None and eval_method == "holdout":
            # if eval_method = holdout, make holdout data
            if self._split_type == "time":
-                if self._state.task == "forecast":
+                if self._state.task == TS_FORECAST:
                    num_samples = X_train_all.shape[0]
                    period = self._state.fit_kwargs["period"]
                    assert (
@ -826,7 +828,7 @@ class AutoML:
            )
        elif self._split_type == "time":
            # logger.info("Using TimeSeriesSplit")
-            if self._state.task == "forecast":
+            if self._state.task == TS_FORECAST:
                period = self._state.fit_kwargs["period"]
                if period * (n_splits + 1) > y_train_all.size:
                    n_splits = int(y_train_all.size / period - 1)
@ -861,7 +863,7 @@ class AutoML:
            record_id: An integer of the record ID in the file,
                0 corresponds to the first trial
            task: A string of the task type,
-                'binary', 'multi', 'regression', 'forecast', 'rank'
+                'binary', 'multi', 'regression', 'ts_forecast', 'rank'

        Returns:
            An estimator object for the given configuration
@ -908,20 +910,24 @@ class AutoML:
        Args:
            log_file_name: A string of the log file name
            X_train: A numpy array of training data in shape n*m
+                For 'ts_forecast' task, the first column of X_train
+                must be the timestamp column (datetime type). Other
+                columns in the dataframe are assumed to be exogenous
+                variables (categorical or numeric).
            y_train: A numpy array of labels in shape n*1
            dataframe: A dataframe of training data including label column.
-                For 'forecast' task, dataframe must be specified and should
-                have two columns: timestamp and value.
-            label: A str of the label column name for 'classification' or
-                'regression' task, e.g., 'label';
-                or a tuple of strings for timestamp and value columns for
-                'forecasting' task, e.g., ('timestamp', 'value').
+                For 'ts_forecast' task, dataframe must be specified and should
+                have at least two columns: timestamp and label, where the first
+                column is the timestamp column (datetime type). Other columns
+                in the dataframe are assumed to be exogenous variables
+                (categorical or numeric).
+            label: A str of the label column name, e.g., 'label';
                Note: If X_train and y_train are provided,
                dataframe and label are ignored;
                If not, dataframe and label must be provided.
            time_budget: A float number of the time budget in seconds.
            task: A string of the task type, e.g.,
-                'classification', 'regression', 'forecast', 'rank'.
+                'classification', 'regression', 'ts_forecast', 'rank'.
            eval_method: A string of resampling strategy, one of
                ['auto', 'cv', 'holdout'].
            split_ratio: A float of the validation data percentage for holdout.
@ -931,7 +937,7 @@ class AutoML:
                    None, 'stratified', 'uniform', 'time', 'group']. None -> stratified.
                For regression tasks, valid choices are [None, 'uniform', 'time'].
                    None -> uniform.
-                For time series forecasting, must be None or 'time'.
+                For ts_forecast tasks, must be None or 'time'.
                For ranking task, must be None or 'group'.
            groups: None or array-like | Group labels (with matching length to
                y_train) or groups counts (with sum equal to length of y_train)
@ -951,7 +957,10 @@ class AutoML:
            **fit_kwargs: Other key word arguments to pass to fit() function of
                the searched learners, such as sample_weight.
        """
-        self._state.task = task
+        if task == FORECAST:
+            self._state.task = TS_FORECAST
+        else:
+            self._state.task = task
        self._state.fit_kwargs = fit_kwargs
        self._validate_data(X_train, y_train, dataframe, label, groups=groups)

@ -1037,12 +1046,12 @@ class AutoML:
        elif self._state.task == "regression":
            assert split_type in [None, "uniform", "time", "group"]
            self._split_type = split_type or "uniform"
-        elif self._state.task == "forecast":
+        elif self._state.task == TS_FORECAST:
            assert split_type in [None, "time"]
            self._split_type = "time"
            assert isinstance(
                self._state.fit_kwargs.get("period"), int
-            ), "missing a required integer 'period' for forecast."
+            ), f"missing a required integer 'period' for '{TS_FORECAST}' task."
        elif self._state.task == "rank":
            assert (
                self._state.groups is not None
@ -1298,16 +1307,16 @@ class AutoML:

        Args:
            X_train: A numpy array or a pandas dataframe of training data in
-                shape (n, m). For 'forecast' task, X_train should contain a
-                single column of timestamps.
+                shape (n, m). For 'ts_forecast' task, the first column of X_train
+                must be the timestamp column (datetime type). Other columns in
+                the dataframe are assumed to be exogenous variables (categorical or numeric).
            y_train: A numpy array or a pandas series of labels in shape (n, ).
            dataframe: A dataframe of training data including label column.
-                For 'forecast' task, dataframe must be specified and should
-                have two columns: timestamp and value.
-            label: A str of the label column name for 'classification' or
-                'regression' task, e.g., 'label';
-                or a tuple of strings for timestamp and value columns for
-                'forecasting' task, e.g., ('timestamp', 'value').
+                For 'ts_forecast' task, dataframe must be specified and must have
+                at least two columns, timestamp and label, where the first
+                column is the timestamp column (datetime type). Other columns in
+                the dataframe are assumed to be exogenous variables (categorical or numeric).
+            label: A str of the label column name for, e.g., 'label';
                Note: If X_train and y_train are provided,
                dataframe and label are ignored;
                If not, dataframe and label must be provided.
@ -1330,7 +1339,7 @@ class AutoML:
                which returns a float number as the minimization objective,
                and a dictionary as the metrics to log.
            task: A string of the task type, e.g.,
-                'classification', 'regression', 'forecast', 'rank'.
+                'classification', 'regression', 'ts_forecast', 'rank'.
            n_jobs: An integer of the number of threads for training.
            log_file_name: A string of the log file name.
            estimator_list: A list of strings for estimator names, or 'auto'
@ -1386,7 +1395,7 @@ class AutoML:
                    None, 'stratified', 'uniform', 'time']. None -> stratified.
                For regression tasks, valid choices are [None, 'uniform', 'time'].
                    None -> uniform.
-                For time series forecasting, must be None or 'time'.
+                For ts_forecast tasks, must be None or 'time'.
                For ranking task, must be None or 'group'.
            hpo_method: str or None, default=None | The hyperparameter
                optimization method. By default, CFO is used for sequential
@ -1433,10 +1442,13 @@ class AutoML:
                size when sample=True.
            **fit_kwargs: Other key word arguments to pass to fit() function of
                the searched learners, such as sample_weight. Include period as
-                a key word argument for 'forecast' task.
+                a key word argument for 'ts_forecast' task.
        """
        self._state._start_time_flag = self._start_time_flag = time.time()
-        self._state.task = task
+        if task == FORECAST:
+            self._state.task = TS_FORECAST
+        else:
+            self._state.task = task
        self._state.log_training_metric = log_training_metric
        self._state.fit_kwargs = fit_kwargs
        self._state.weight_val = sample_weight_val
@ -1488,7 +1500,7 @@ class AutoML:
                metric = "roc_auc"
            elif "multi" in self._state.task:
                metric = "log_loss"
-            elif self._state.task == "forecast":
+            elif self._state.task == TS_FORECAST:
                metric = "mape"
            elif self._state.task == "rank":
                metric = "ndcg"
@ -1515,7 +1527,7 @@ class AutoML:
        logger.info(f"Minimizing error metric: {error_metric}")

        if "auto" == estimator_list:
-            if self._state.task == "forecast":
+            if self._state.task == TS_FORECAST:
                try:
                    import prophet

@ -2132,7 +2144,7 @@ class AutoML:
            elif self._retrain_final:
                # reset time budget for retraining
                self._state.time_from_start -= self._state.time_budget
-                if self._state.task == "forecast" or (
+                if self._state.task == TS_FORECAST or (
                    self._state.time_budget - self._state.time_from_start
                    > self._selected.est_retrain_time(self.data_size_full)
                    and self._selected.best_config_sample_size == self._state.data_size
--- a/flaml/data.py
+++ b/flaml/data.py
@ -12,6 +12,10 @@ from .training_log import training_log_reader
 from datetime import datetime

 CLASSIFICATION = ("binary", "multi", "classification")
+TS_FORECAST = "ts_forecast"
+TS_TIMESTAMP_COL = "ds"
+TS_VALUE_COL = "y"
+FORECAST = "forecast"


 def load_openml_dataset(
@ -212,6 +216,11 @@ class DataTransformer:
            n = X.shape[0]
            cat_columns, num_columns, datetime_columns = [], [], []
            drop = False
+            if task == TS_FORECAST:
+                X = X.rename(columns={X.columns[0]: TS_TIMESTAMP_COL})
+                ds_col = X.pop(TS_TIMESTAMP_COL)
+                if isinstance(y, pd.Series):
+                    y = y.rename(TS_VALUE_COL)
            for column in X.columns:
                # sklearn\utils\validation.py needs int/float values
                if X[column].dtype.name in ("object", "category"):
@ -270,6 +279,8 @@ class DataTransformer:
                            X[column] = X[column].fillna(np.nan)
                            num_columns.append(column)
            X = X[cat_columns + num_columns]
+            if task == TS_FORECAST:
+                X.insert(0, TS_TIMESTAMP_COL, ds_col)
            if cat_columns:
                X[cat_columns] = X[cat_columns].astype("category")
            if num_columns:
@ -312,7 +323,7 @@ class DataTransformer:
            self.label_transformer = None
        return X, y

-    def transform(self, X):
+    def transform(self, X, task):
        X = X.copy()
        if isinstance(X, pd.DataFrame):
            cat_columns, num_columns, datetime_columns = (
@ -320,6 +331,9 @@ class DataTransformer:
                self._num_columns,
                self._datetime_columns,
            )
+            if task == TS_FORECAST:
+                X = X.rename(columns={X.columns[0]: TS_TIMESTAMP_COL})
+                ds_col = X.pop(TS_TIMESTAMP_COL)
            if datetime_columns:
                for column in datetime_columns:
                    tmp_dt = X[column].dt
@ -344,6 +358,8 @@ class DataTransformer:
                    X[column] = X[column].map(datetime.toordinal)
                    del tmp_dt
            X = X[cat_columns + num_columns].copy()
+            if task == TS_FORECAST:
+                X.insert(0, TS_TIMESTAMP_COL, ds_col)
            for column in cat_columns:
                if X[column].dtype.name == "object":
                    X[column] = X[column].fillna("__NAN__")
--- a/flaml/ml.py
+++ b/flaml/ml.py
@ -33,7 +33,7 @@ from .model import (
    ARIMA,
    SARIMAX,
 )
-from .data import CLASSIFICATION, group_counts
+from .data import CLASSIFICATION, group_counts, TS_FORECAST, TS_VALUE_COL

 import logging

@ -313,8 +313,8 @@ def evaluate_model_CV(
        groups = kf.groups
        kf = kf.split(X_train_split, y_train_split, groups)
        shuffle = False
-    elif isinstance(kf, TimeSeriesSplit) and task == "forecast":
-        y_train_all = pd.DataFrame(y_train_all, columns=["y"])
+    elif isinstance(kf, TimeSeriesSplit) and task == TS_FORECAST:
+        y_train_all = pd.DataFrame(y_train_all, columns=[TS_VALUE_COL])
        train = X_train_all.join(y_train_all)
        kf = kf.split(train)
        shuffle = False
--- a/flaml/model.py
+++ b/flaml/model.py
@ -11,7 +11,7 @@ from sklearn.linear_model import LogisticRegression
 from scipy.sparse import issparse
 import pandas as pd
 from . import tune
-from .data import group_counts, CLASSIFICATION
+from .data import group_counts, CLASSIFICATION, TS_FORECAST, TS_TIMESTAMP_COL, TS_VALUE_COL

 import logging

@ -871,22 +871,22 @@ class KNeighborsEstimator(BaseEstimator):
        return X


-class Prophet(BaseEstimator):
+class Prophet(SKLearnEstimator):
    @classmethod
    def search_space(cls, **params):
        space = {
            "changepoint_prior_scale": {
-                "domain": tune.loguniform(lower=0.001, upper=1000),
-                "init_value": 0.01,
+                "domain": tune.loguniform(lower=0.001, upper=0.05),
+                "init_value": 0.05,
                "low_cost_init_value": 0.001,
            },
            "seasonality_prior_scale": {
-                "domain": tune.loguniform(lower=0.01, upper=100),
-                "init_value": 1,
+                "domain": tune.loguniform(lower=0.01, upper=10),
+                "init_value": 10,
            },
            "holidays_prior_scale": {
-                "domain": tune.loguniform(lower=0.01, upper=100),
-                "init_value": 1,
+                "domain": tune.loguniform(lower=0.01, upper=10),
+                "init_value": 10,
            },
            "seasonality_mode": {
                "domain": tune.choice(["additive", "multiplicative"]),
@ -895,15 +895,15 @@ class Prophet(BaseEstimator):
        }
        return space

-    def __init__(self, task="forecast", n_jobs=1, **params):
+    def __init__(self, task=TS_FORECAST, n_jobs=1, **params):
        super().__init__(task, **params)

    def _join(self, X_train, y_train):
-        assert "ds" in X_train, (
-            "Dataframe for training forecast model must have column"
-            ' "ds" with the dates in X_train.'
+        assert TS_TIMESTAMP_COL in X_train, (
+            "Dataframe for training ts_forecast model must have column"
+            f' "{TS_TIMESTAMP_COL}" with the dates in X_train.'
        )
-        y_train = pd.DataFrame(y_train, columns=["y"])
+        y_train = pd.DataFrame(y_train, columns=[TS_VALUE_COL])
        train_df = X_train.join(y_train)
        return train_df

@ -912,7 +912,14 @@ class Prophet(BaseEstimator):

        current_time = time.time()
        train_df = self._join(X_train, y_train)
-        model = Prophet(**self.params).fit(train_df)
+        train_df = self._preprocess(train_df)
+        cols = list(train_df)
+        cols.remove(TS_TIMESTAMP_COL)
+        cols.remove(TS_VALUE_COL)
+        model = Prophet(**self.params)
+        for regressor in cols:
+            model.add_regressor(regressor)
+        model.fit(train_df)
        train_time = time.time() - current_time
        self._model = model
        return train_time
@ -921,9 +928,11 @@ class Prophet(BaseEstimator):
        if isinstance(X_test, int):
            raise ValueError(
                "predict() with steps is only supported for arima/sarimax."
-                " For Prophet, pass a dataframe with a date colum named ds."
+                " For Prophet, pass a dataframe with the first column containing"
+                " the timestamp values."
            )
        if self._model is not None:
+            X_test = self._preprocess(X_test)
            forecast = self._model.predict(X_test)
            return forecast["yhat"]
        else:
@ -949,7 +958,7 @@ class ARIMA(Prophet):
            },
            "q": {
                "domain": tune.quniform(lower=0, upper=10, q=1),
-                "init_value": 2,
+                "init_value": 1,
                "low_cost_init_value": 0,
            },
        }
@ -957,8 +966,8 @@ class ARIMA(Prophet):

    def _join(self, X_train, y_train):
        train_df = super()._join(X_train, y_train)
-        train_df.index = pd.to_datetime(train_df["ds"])
-        train_df = train_df.drop("ds", axis=1)
+        train_df.index = pd.to_datetime(train_df[TS_TIMESTAMP_COL])
+        train_df = train_df.drop(TS_TIMESTAMP_COL, axis=1)
        return train_df

    def fit(self, X_train, y_train, budget=None, **kwargs):
@ -969,12 +978,20 @@ class ARIMA(Prophet):

        current_time = time.time()
        train_df = self._join(X_train, y_train)
-        model = ARIMA_estimator(
-            train_df,
-            order=(self.params["p"], self.params["d"], self.params["q"]),
-            enforce_stationarity=False,
-            enforce_invertibility=False,
-        )
+        train_df = self._preprocess(train_df)
+        cols = list(train_df)
+        cols.remove(TS_VALUE_COL)
+        regressors = cols
+        if regressors:
+            model = ARIMA_estimator(
+                train_df[[TS_VALUE_COL]], exog=train_df[regressors], order=(
+                    self.params["p"], self.params["d"], self.params["q"]),
+                enforce_stationarity=False, enforce_invertibility=False)
+        else:
+            model = ARIMA_estimator(
+                train_df, order=(
+                    self.params["p"], self.params["d"], self.params["q"]),
+                enforce_stationarity=False, enforce_invertibility=False)
        model = model.fit()
        train_time = time.time() - current_time
        self._model = model
@ -985,12 +1002,20 @@ class ARIMA(Prophet):
            if isinstance(X_test, int):
                forecast = self._model.forecast(steps=X_test)
            elif isinstance(X_test, pd.DataFrame):
+                first_col = X_test.pop(TS_TIMESTAMP_COL)
+                X_test.insert(0, TS_TIMESTAMP_COL, first_col)
                start = X_test.iloc[0, 0]
                end = X_test.iloc[-1, 0]
-                forecast = self._model.predict(start=start, end=end)
+                if len(X_test.columns) > 1:
+                    regressors = list(X_test)
+                    regressors.remove(TS_TIMESTAMP_COL)
+                    X_test = self._preprocess(X_test)
+                    forecast = self._model.predict(start=start, end=end, exog=X_test[regressors])
+                else:
+                    forecast = self._model.predict(start=start, end=end)
            else:
                raise ValueError(
-                    "X_test needs to be either a pd.Dataframe with dates as column ds)"
+                    "X_test needs to be either a pd.Dataframe with dates as the first column"
                    " or an int number of periods for predict()."
                )
            return forecast
@ -1014,7 +1039,7 @@ class SARIMAX(ARIMA):
            },
            "q": {
                "domain": tune.quniform(lower=0, upper=10, q=1),
-                "init_value": 2,
+                "init_value": 1,
                "low_cost_init_value": 0,
            },
            "P": {
@ -1040,22 +1065,36 @@ class SARIMAX(ARIMA):
        return space

    def fit(self, X_train, y_train, budget=None, **kwargs):
+        import warnings
+
+        warnings.filterwarnings("ignore")
        from statsmodels.tsa.statespace.sarimax import SARIMAX as SARIMAX_estimator

        current_time = time.time()
        train_df = self._join(X_train, y_train)
-        model = SARIMAX_estimator(
-            train_df,
-            order=(self.params["p"], self.params["d"], self.params["q"]),
-            seasonality_order=(
-                self.params["P"],
-                self.params["D"],
-                self.params["Q"],
-                self.params["s"],
-            ),
-            enforce_stationarity=False,
-            enforce_invertibility=False,
-        )
+        train_df = self._preprocess(train_df)
+        regressors = list(train_df)
+        regressors.remove(TS_VALUE_COL)
+        if regressors:
+            model = SARIMAX_estimator(
+                train_df[[TS_VALUE_COL]], exog=train_df[regressors], order=(
+                    self.params["p"], self.params["d"], self.params["q"]),
+                seasonality_order=(
+                    self.params["P"],
+                    self.params["D"],
+                    self.params["Q"],
+                    self.params["s"]),
+                enforce_stationarity=False, enforce_invertibility=False)
+        else:
+            model = SARIMAX_estimator(
+                train_df, order=(
+                    self.params["p"], self.params["d"], self.params["q"]),
+                seasonality_order=(
+                    self.params["P"],
+                    self.params["D"],
+                    self.params["Q"],
+                    self.params["s"]),
+                enforce_stationarity=False, enforce_invertibility=False)
        model = model.fit()
        train_time = time.time() - current_time
        self._model = model
--- a/flaml/version.py
+++ b/flaml/version.py
@ -1 +1 @@
-__version__ = "0.6.9"
+__version__ = "0.7.0"
--- a/notebook/flaml_forecast.ipynb
+++ b/notebook/flaml_forecast.ipynb
@ -2,13 +2,14 @@
 "cells": [
  {
   "cell_type": "markdown",
+   "metadata": {},
   "source": [
    "# Time Series Forecasting with FLAML Library"
-   ],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "markdown",
+   "metadata": {},
   "source": [
    "## 1. Introduction\n",
    "\n",
@ -20,32 +21,33 @@
    " - In this notebook, we demonstrate how to use FLAML library to tune hyperparameters of XGBoost with a regression example.\n",
    "\n",
    "FLAML requires Python>=3.6. To run this notebook example, please install flaml with the notebook and forecast option:\n"
-   ],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "source": [
-    "!pip install flaml[notebook,forecast]"
-   ],
+   "metadata": {},
   "outputs": [],
-   "metadata": {}
+   "source": [
+    "!pip install flaml[notebook,ts_forecast]"
+   ]
  },
  {
   "cell_type": "markdown",
+   "metadata": {},
   "source": [
-    "## 2. Forecast Problem\r\n",
-    "\r\n",
-    "### Load data and preprocess\r\n",
-    "\r\n",
+    "## 2. Forecast Problem\n",
+    "\n",
+    "### Load data and preprocess\n",
+    "\n",
    "Import co2 data from statsmodel. The dataset is from “Atmospheric CO2 from Continuous Air Samples at Mauna Loa Observatory, Hawaii, U.S.A.,” which collected CO2 samples from March 1958 to December 2001. The task is to predict monthly CO2 samples."
-   ],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "import statsmodels.api as sm\n",
    "data = sm.datasets.co2.load_pandas()\n",
@ -55,149 +57,149 @@
    "data = data.fillna(data.bfill())  # makes sure there are no missing values\n",
    "data = data.to_frame().reset_index()\n",
    "# data = data.rename(columns={'index': 'ds', 'co2': 'y'})"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# split the data into a train dataframe and X_test and y_test dataframes, where the number of samples for test is equal to\n",
    "# the number of periods the user wants to predict\n",
    "num_samples = data.shape[0]\n",
    "time_horizon = 12\n",
    "split_idx = num_samples - time_horizon\n",
-    "X_train = data[:split_idx]  # X_train is a dataframe with two columns: time and value\n",
-    "X_test = data[split_idx:]['index'].to_frame('ds')  # X_test is a dataframe with dates for prediction\n",
+    "train_df = data[:split_idx]  # train_df is a dataframe with two columns: timestamp and label\n",
+    "X_test = data[split_idx:]['index'].to_frame()  # X_test is a dataframe with dates for prediction\n",
    "y_test = data[split_idx:]['co2']  # y_test is a series of the values corresponding to the dates for prediction"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "markdown",
+   "metadata": {},
   "source": [
-    "### Run FLAML\r\n",
-    "\r\n",
+    "### Run FLAML\n",
+    "\n",
    "In the FLAML automl run configuration, users can specify the task type, time budget, error metric, learner list, whether to subsample, resampling strategy type, and so on. All these arguments have default values which will be used if users do not provide them."
-   ],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "''' import AutoML class from flaml package '''\n",
    "from flaml import AutoML\n",
    "automl = AutoML()"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "settings = {\n",
    "    \"time_budget\": 180,  # total running time in seconds\n",
    "    \"metric\": 'mape',  # primary metric for validation: 'mape' is generally used for forecast tasks\n",
-    "    \"task\": 'forecast',  # task type\n",
+    "    \"task\": 'ts_forecast',  # task type\n",
    "    \"log_file_name\": 'CO2_forecast.log',  # flaml log file\n",
    "    \"eval_method\": \"holdout\",  # validation method can be chosen from ['auto', 'holdout', 'cv']\n",
    "    \"seed\": 7654321,  # random seed\n",
    "}"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "'''The main flaml automl API'''\n",
-    "automl.fit(dataframe=X_train,  # training data\n",
-    "           label=('index', 'co2'),  # For 'forecast' task, label should be a tuple of strings for timestamp and value columns\n",
+    "automl.fit(dataframe=train_df,  # training data\n",
+    "           label='co2',  # For 'forecast' task, label should be a tuple of strings for timestamp and value columns\n",
    "           **settings, \n",
    "           period=time_horizon)  # key word argument 'period' must be included for forecast task)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "markdown",
+   "metadata": {},
   "source": [
    "### Best model and metric"
-   ],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "''' retrieve best config and best learner'''\n",
    "print('Best ML leaner:', automl.best_estimator)\n",
    "print('Best hyperparmeter config:', automl.best_config)\n",
    "print(f'Best mape on validation data: {automl.best_loss}')\n",
    "print(f'Training duration of best run: {automl.best_config_train_time}s')"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "print(automl.model.estimator)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "''' pickle and save the automl object '''\n",
    "import pickle\n",
    "with open('automl.pkl', 'wb') as f:\n",
    "    pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "''' compute predictions of testing dataset '''\n",
    "flaml_y_pred = automl.predict(X_test)\n",
    "print('Predicted labels', flaml_y_pred)\n",
    "print('True labels', y_test)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "''' compute different metric values on testing dataset'''\n",
    "from flaml.ml import sklearn_metric_loss_score\n",
-    "print('mape', '=', sklearn_metric_loss_score('mape', flaml_y_pred, y_test))"
-   ],
-   "outputs": [],
-   "metadata": {}
+    "print('mape', '=', sklearn_metric_loss_score('mape', y_predict=flaml_y_pred, y_true=y_test))"
+   ]
  },
  {
   "cell_type": "markdown",
+   "metadata": {},
   "source": [
    "### Log history"
-   ],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "from flaml.data import get_output_from_log\n",
    "time_history, best_valid_loss_history, valid_loss_history, config_history, train_loss_history = \\\n",
@ -205,13 +207,13 @@
    "\n",
    "for config in config_history:\n",
    "    print(config)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
@ -222,13 +224,13 @@
    "plt.scatter(time_history, 1 - np.array(valid_loss_history))\n",
    "plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post')\n",
    "plt.show()"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "plt.plot(X_test, y_test, label='Actual level')\n",
@ -236,32 +238,30 @@
    "plt.xlabel('Date')\n",
    "plt.ylabel('CO2 Levels')\n",
    "plt.legend()"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
  }
 ],
 "metadata": {
+  "interpreter": {
+   "hash": "8b6c8c3ba4bafbc4530f534c605c8412f25bf61ef13254e4f377ccd42b838aa4"
+  },
  "kernelspec": {
-   "name": "python3",
-   "display_name": "Python 3.8.0 64-bit ('blend': conda)"
+   "display_name": "Python 3.8.10 64-bit ('python38': conda)",
+   "name": "python3"
  },
  "language_info": {
-   "name": "python",
-   "version": "3.8.0",
-   "mimetype": "text/x-python",
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
-   "pygments_lexer": "ipython3",
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
   "nbconvert_exporter": "python",
-   "file_extension": ".py"
-  },
-  "interpreter": {
-   "hash": "0cfea3304185a9579d09e0953576b57c8581e46e6ebc6dfeb681bc5a511f7544"
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
-}
+}
--- a/setup.py
+++ b/setup.py
@ -81,6 +81,7 @@ setuptools.setup(
            "tensorboardX<=2.2",
            "torch",
        ],
+        "ts_forecast": ["prophet>=1.0.1", "statsmodels>=0.12.2"],
        "forecast": ["prophet>=1.0.1", "statsmodels>=0.12.2"],
    },
    classifiers=[
--- a/test/test_forecast.py
+++ b/test/test_forecast.py
@ -23,10 +23,10 @@ def test_forecast_automl(budget=5):
    settings = {
        "time_budget": budget,  # total running time in seconds
        "metric": "mape",  # primary metric
-        "task": "forecast",  # task type
+        "task": "ts_forecast",  # task type
        "log_file_name": "test/CO2_forecast.log",  # flaml log file
        "eval_method": "holdout",
-        "label": ("ds", "y"),
+        "label": "y",
    }
    """The main flaml automl API"""
    try:
@ -75,7 +75,7 @@ def test_forecast_automl(budget=5):
    print(automl.max_resource)
    print(automl.min_resource)

-    X_train = df["ds"]
+    X_train = df[["ds"]]
    y_train = df["y"]
    automl = AutoML()
    try:
@ -93,39 +93,252 @@ def test_forecast_automl(budget=5):

 def test_numpy():
    X_train = np.arange("2014-01", "2021-01", dtype="datetime64[M]")
-    y_train = np.random.random(size=72)
+    y_train = np.random.random(size=len(X_train))
    automl = AutoML()
    try:
        import prophet

        automl.fit(
-            X_train=X_train[:60],  # a single column of timestamp
-            y_train=y_train,  # value for each timestamp
+            X_train=X_train[:72],  # a single column of timestamp
+            y_train=y_train[:72],  # value for each timestamp
            period=12,  # time horizon to forecast, e.g., 12 months
-            task="forecast",
+            task="ts_forecast",
            time_budget=3,  # time budget in seconds
-            log_file_name="test/forecast.log",
+            log_file_name="test/ts_forecast.log",
        )
-        print(automl.predict(X_train[60:]))
-        print(automl.predict(12))
-    except ValueError:
-        print("ValueError for prophet is raised as expected.")
+        print(automl.predict(X_train[72:]))
    except ImportError:
        print("not using prophet due to ImportError")
        automl = AutoML()
        automl.fit(
            X_train=X_train[:72],  # a single column of timestamp
-            y_train=y_train,  # value for each timestamp
+            y_train=y_train[:72],  # value for each timestamp
            period=12,  # time horizon to forecast, e.g., 12 months
-            task="forecast",
+            task="ts_forecast",
            time_budget=1,  # time budget in seconds
            estimator_list=["arima", "sarimax"],
-            log_file_name="test/forecast.log",
+            log_file_name="test/ts_forecast.log",
        )
        print(automl.predict(X_train[72:]))
        # an alternative way to specify predict steps for arima/sarimax
        print(automl.predict(12))


+def load_multi_dataset():
+    """multivariate time series forecasting dataset"""
+    import pandas as pd
+
+    # pd.set_option("display.max_rows", None, "display.max_columns", None)
+    df = pd.read_csv("https://raw.githubusercontent.com/srivatsan88/YouTubeLI/master/dataset/nyc_energy_consumption.csv")
+    # preprocessing data
+    df["timeStamp"] = pd.to_datetime(df["timeStamp"])
+    df = df.set_index("timeStamp")
+    df = df.resample("D").mean()
+    df["temp"] = df["temp"].fillna(method="ffill")
+    df["precip"] = df["precip"].fillna(method="ffill")
+    df = df[:-2]  # last two rows are NaN for 'demand' column so remove them
+    df = df.reset_index()
+
+    return df
+
+
+def test_multivariate_forecast_num(budget=5):
+    df = load_multi_dataset()
+    # split data into train and test
+    time_horizon = 180
+    num_samples = df.shape[0]
+    split_idx = num_samples - time_horizon
+    train_df = df[:split_idx]
+    test_df = df[split_idx:]
+    X_test = test_df[["timeStamp", "temp", "precip"]]  # test dataframe must contain values for the regressors / multivariate variables
+    y_test = test_df["demand"]
+    # return
+    automl = AutoML()
+    settings = {
+        "time_budget": budget,  # total running time in seconds
+        "metric": "mape",  # primary metric
+        "task": "ts_forecast",  # task type
+        "log_file_name": "test/energy_forecast_numerical.log",  # flaml log file
+        "eval_method": "holdout",
+        "log_type": "all",
+        "label": "demand"
+    }
+    '''The main flaml automl API'''
+    try:
+        import prophet
+
+        automl.fit(dataframe=train_df, **settings, period=time_horizon)
+    except ImportError:
+        print("not using prophet due to ImportError")
+        automl.fit(
+            dataframe=train_df,
+            **settings,
+            estimator_list=["arima", "sarimax"],
+            period=time_horizon,
+        )
+    """ retrieve best config and best learner"""
+    print("Best ML leaner:", automl.best_estimator)
+    print("Best hyperparmeter config:", automl.best_config)
+    print(f"Best mape on validation data: {automl.best_loss}")
+    print(f"Training duration of best run: {automl.best_config_train_time}s")
+    print(automl.model.estimator)
+    """ pickle and save the automl object """
+    import pickle
+
+    with open("automl.pkl", "wb") as f:
+        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
+    """ compute predictions of testing dataset """
+    y_pred = automl.predict(X_test)
+    print("Predicted labels", y_pred)
+    print("True labels", y_test)
+    """ compute different metric values on testing dataset"""
+    from flaml.ml import sklearn_metric_loss_score
+
+    print("mape", "=", sklearn_metric_loss_score("mape", y_pred, y_test))
+    from flaml.data import get_output_from_log
+
+    time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \
+        get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
+    for config in config_history:
+        print(config)
+    print(automl.prune_attr)
+    print(automl.max_resource)
+    print(automl.min_resource)
+
+    # import matplotlib.pyplot as plt
+    #
+    # plt.figure()
+    # plt.plot(X_test["timeStamp"], y_test, label="Actual Demand")
+    # plt.plot(X_test["timeStamp"], y_pred, label="FLAML Forecast")
+    # plt.xlabel("Date")
+    # plt.ylabel("Energy Demand")
+    # plt.legend()
+    # plt.show()
+
+
+def load_multi_dataset_cat(time_horizon):
+    df = load_multi_dataset()
+
+    df = df[["timeStamp", "demand", "temp"]]
+
+    # feature engineering - use discrete values to denote different categories
+    def season(date):
+        date = (date.month, date.day)
+        spring = (3, 20)
+        summer = (6, 21)
+        fall = (9, 22)
+        winter = (12, 21)
+        if date < spring or date >= winter:
+            return "winter"  # winter 0
+        elif spring <= date < summer:
+            return "spring"  # spring 1
+        elif summer <= date < fall:
+            return "summer"  # summer 2
+        elif fall <= date < winter:
+            return "fall"  # fall 3
+
+    def get_monthly_avg(data):
+        data["month"] = data["timeStamp"].dt.month
+        data = data[["month", "temp"]].groupby("month")
+        data = data.agg({"temp": "mean"})
+        return data
+
+    monthly_avg = get_monthly_avg(df).to_dict().get("temp")
+
+    def above_monthly_avg(date, temp):
+        month = date.month
+        if temp > monthly_avg.get(month):
+            return 1
+        else:
+            return 0
+
+    df["season"] = df["timeStamp"].apply(season)
+    df["above_monthly_avg"] = df.apply(lambda x: above_monthly_avg(x["timeStamp"], x["temp"]), axis=1)
+
+    # split data into train and test
+    num_samples = df.shape[0]
+    split_idx = num_samples - time_horizon
+    train_df = df[:split_idx]
+    test_df = df[split_idx:]
+
+    del train_df["temp"], train_df["month"]
+
+    return train_df, test_df
+
+
+def test_multivariate_forecast_cat(budget=5):
+    time_horizon = 180
+    train_df, test_df = load_multi_dataset_cat(time_horizon)
+    print(train_df)
+    X_test = test_df[["timeStamp", "season", "above_monthly_avg"]]  # test dataframe must contain values for the regressors / multivariate variables
+    y_test = test_df["demand"]
+    automl = AutoML()
+    settings = {
+        "time_budget": budget,  # total running time in seconds
+        "metric": "mape",  # primary metric
+        "task": "ts_forecast",  # task type
+        "log_file_name": "test/energy_forecast_numerical.log",  # flaml log file
+        "eval_method": "holdout",
+        "log_type": "all",
+        "label": "demand"
+    }
+    '''The main flaml automl API'''
+    try:
+        import prophet
+
+        automl.fit(dataframe=train_df, **settings, period=time_horizon)
+    except ImportError:
+        print("not using prophet due to ImportError")
+        automl.fit(
+            dataframe=train_df,
+            **settings,
+            estimator_list=["arima", "sarimax"],
+            period=time_horizon,
+        )
+    """ retrieve best config and best learner"""
+    print("Best ML leaner:", automl.best_estimator)
+    print("Best hyperparmeter config:", automl.best_config)
+    print(f"Best mape on validation data: {automl.best_loss}")
+    print(f"Training duration of best run: {automl.best_config_train_time}s")
+    print(automl.model.estimator)
+    """ pickle and save the automl object """
+    import pickle
+
+    with open("automl.pkl", "wb") as f:
+        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
+    """ compute predictions of testing dataset """
+    y_pred = automl.predict(X_test)
+    print("Predicted labels", y_pred)
+    print("True labels", y_test)
+    """ compute different metric values on testing dataset"""
+    from flaml.ml import sklearn_metric_loss_score
+
+    print("mape", "=", sklearn_metric_loss_score("mape", y_pred, y_test))
+    print("rmse", "=", sklearn_metric_loss_score("rmse", y_pred, y_test))
+    print("mse", "=", sklearn_metric_loss_score("mse", y_pred, y_test))
+    print("mae", "=", sklearn_metric_loss_score("mae", y_pred, y_test))
+    from flaml.data import get_output_from_log
+
+    time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \
+        get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
+    for config in config_history:
+        print(config)
+    print(automl.prune_attr)
+    print(automl.max_resource)
+    print(automl.min_resource)
+
+    # import matplotlib.pyplot as plt
+    #
+    # plt.figure()
+    # plt.plot(X_test["timeStamp"], y_test, label="Actual Demand")
+    # plt.plot(X_test["timeStamp"], y_pred, label="FLAML Forecast")
+    # plt.xlabel("Date")
+    # plt.ylabel("Energy Demand")
+    # plt.legend()
+    # plt.show()
+
+
 if __name__ == "__main__":
    test_forecast_automl(60)
+    test_multivariate_forecast_num(60)
+    test_multivariate_forecast_cat(60)