autogen/test/test_forecast.py

def test_forecast_automl_df(budget=5):
    # using dataframe
    import statsmodels.api as sm
    data = sm.datasets.co2.load_pandas()
    data = data.data
    data = data['co2'].resample('MS').mean()
    data = data.fillna(data.bfill())
    data = data.to_frame().reset_index()
    data = data.rename(columns={'index': 'ds', 'co2': 'y'})
    num_samples = data.shape[0]
    time_horizon = 12
    split_idx = num_samples - time_horizon
    X_train = data[:split_idx]
    X_test = data[split_idx:]['ds'].to_frame()
    y_test = data[split_idx:]['y'].to_frame()
    ''' import AutoML class from flaml package '''
    from flaml import AutoML
    automl = AutoML()
    settings = {
        "time_budget": budget,  # total running time in seconds
        "metric": 'mape',  # primary metric
        "task": 'forecast',  # task type
        "log_file_name": 'CO2_forecast.log',  # flaml log file
        "eval_method": "holdout",
        "split_type": 'time'
    }
    '''The main flaml automl API'''
    try:
        automl.fit(dataframe=X_train, **settings, period=time_horizon, freq='M')
    except ImportError:
        automl.fit(dataframe=X_train, **settings, estimator_list=['arima', 'sarimax'], period=time_horizon, freq='M')
    ''' retrieve best config and best learner'''
    print('Best ML leaner:', automl.best_estimator)
    print('Best hyperparmeter config:', automl.best_config)
    print(f'Best mape on validation data: {automl.best_loss}')
    print(f'Training duration of best run: {automl.best_config_train_time}s')
    print(automl.model.estimator)
    ''' pickle and save the automl object '''
    import pickle
    with open('automl.pkl', 'wb') as f:
        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
    ''' compute predictions of testing dataset '''
    y_pred = automl.predict(X_test)
    print('Predicted labels', y_pred)
    print('True labels', y_test)
    ''' compute different metric values on testing dataset'''
    from flaml.ml import sklearn_metric_loss_score
    print('mape', '=', sklearn_metric_loss_score('mape', y_pred, y_test))
    from flaml.data import get_output_from_log
    time_history, best_valid_loss_history, valid_loss_history, config_history, train_loss_history = \
        get_output_from_log(filename=settings['log_file_name'], time_budget=budget)
    for config in config_history:
        print(config)
    print(automl.prune_attr)
    print(automl.max_resource)
    print(automl.min_resource)


def test_forecast_automl_Xy(budget=5):
    # using X_train and y_train
    import statsmodels.api as sm
    data = sm.datasets.co2.load_pandas()
    data = data.data
    data = data['co2'].resample('MS').mean()
    data = data.fillna(data.bfill())
    data = data.to_frame().reset_index()
    num_samples = data.shape[0]
    time_horizon = 12
    split_idx = num_samples - time_horizon
    X_train = data[:split_idx]['index'].to_frame()
    y_train = data[:split_idx]['co2']
    X_test = data[split_idx:]['index'].to_frame()
    y_test = data[split_idx:]['co2'].to_frame()
    ''' import AutoML class from flaml package '''
    from flaml import AutoML
    automl = AutoML()
    settings = {
        "time_budget": budget,  # total running time in seconds
        "metric": 'mape',  # primary metric
        "task": 'forecast',  # task type
        "log_file_name": 'CO2_forecast.log',  # flaml log file
        "eval_method": "holdout",
        "split_type": 'time'
    }
    '''The main flaml automl API'''
    try:
        automl.fit(X_train=X_train, y_train=y_train, **settings, period=time_horizon, freq='M')
    except ImportError:
        automl.fit(X_train=X_train, y_train=y_train, **settings, estimator_list=['arima', 'sarimax'], period=time_horizon, freq='M')
    ''' retrieve best config and best learner'''
    print('Best ML leaner:', automl.best_estimator)
    print('Best hyperparmeter config:', automl.best_config)
    print(f'Best mape on validation data: {automl.best_loss}')
    print(f'Training duration of best run: {automl.best_config_train_time}s')
    print(automl.model.estimator)
    ''' pickle and save the automl object '''
    import pickle
    with open('automl.pkl', 'wb') as f:
        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
    ''' compute predictions of testing dataset '''
    y_pred = automl.predict(X_test)
    print('Predicted labels', y_pred)
    print('True labels', y_test)
    ''' compute different metric values on testing dataset'''
    from flaml.ml import sklearn_metric_loss_score
    print('mape', '=', sklearn_metric_loss_score('mape', y_pred, y_test))
    from flaml.data import get_output_from_log
    time_history, best_valid_loss_history, valid_loss_history, config_history, train_loss_history = \
        get_output_from_log(filename=settings['log_file_name'], time_budget=budget)
    for config in config_history:
        print(config)
    print(automl.prune_attr)
    print(automl.max_resource)
    print(automl.min_resource)


if __name__ == "__main__":
    test_forecast_automl_df(60)
    test_forecast_automl_Xy(60)