autogen/test/test_model.py

from sklearn.datasets import make_classification
import numpy as np
from pandas import DataFrame
from datetime import datetime
from flaml.model import (
    KNeighborsEstimator,
    LRL2Classifier,
    BaseEstimator,
    LGBMEstimator,
    CatBoostEstimator,
    XGBoostEstimator,
    RandomForestEstimator,
    Prophet,
    ARIMA,
    LGBM_TS,
)


def test_lrl2():
    BaseEstimator.search_space(1, "")
    X, y = make_classification(100000, 1000)
    print("start")
    lr = LRL2Classifier()
    lr.predict(X)
    lr.fit(X, y, budget=1e-5)


def test_prep():
    X = np.array(
        list(
            zip(
                [
                    3.0,
                    16.0,
                    10.0,
                    12.0,
                    3.0,
                    14.0,
                    11.0,
                    12.0,
                    5.0,
                    14.0,
                    20.0,
                    16.0,
                    15.0,
                    11.0,
                ],
                [
                    "a",
                    "b",
                    "a",
                    "c",
                    "c",
                    "b",
                    "b",
                    "b",
                    "b",
                    "a",
                    "b",
                    1.0,
                    1.0,
                    "a",
                ],
            )
        ),
        dtype=object,
    )
    y = np.array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
    lr = LRL2Classifier()
    lr.fit(X, y)
    lr.predict(X)
    print(lr.feature_names_in_)
    print(lr.feature_importances_)
    lgbm = LGBMEstimator(n_estimators=4)
    lgbm.fit(X, y)
    print(lgbm.feature_names_in_)
    print(lgbm.feature_importances_)
    cat = CatBoostEstimator(n_estimators=4)
    cat.fit(X, y)
    print(cat.feature_names_in_)
    print(cat.feature_importances_)
    knn = KNeighborsEstimator(task="regression")
    knn.fit(X, y)
    print(knn.feature_names_in_)
    print(knn.feature_importances_)
    xgb = XGBoostEstimator(n_estimators=4, max_leaves=4)
    xgb.fit(X, y)
    xgb.predict(X)
    print(xgb.feature_names_in_)
    print(xgb.feature_importances_)
    rf = RandomForestEstimator(task="regression", n_estimators=4, criterion="gini")
    rf.fit(X, y)
    print(rf.feature_names_in_)
    print(rf.feature_importances_)

    prophet = Prophet()
    try:
        prophet.predict(4)
    except ValueError:
        # predict() with steps is only supported for arima/sarimax.
        pass
    prophet.predict(X)

    arima = ARIMA()
    arima.predict(X)
    arima._model = False
    try:
        arima.predict(X)
    except ValueError:
        # X_test needs to be either a pandas Dataframe with dates as the first column or an int number of periods for predict().
        pass

    lgbm = LGBM_TS(optimize_for_horizon=True, lags=1)
    X = DataFrame(
        {
            "A": [
                datetime(1900, 2, 3),
                datetime(1900, 3, 4),
                datetime(1900, 3, 4),
                datetime(1900, 3, 4),
                datetime(1900, 7, 2),
                datetime(1900, 8, 9),
            ],
        }
    )
    y = np.array([0, 1, 0, 1, 0, 0])
    lgbm.predict(X[:2])
    lgbm.fit(X, y, period=2)
    lgbm.predict(X[:2])
    print(lgbm.feature_names_in_)
    print(lgbm.feature_importances_)


if __name__ == "__main__":
    test_prep()
fix issues in logging, bug in space.py, constraint sign, and improve code coverage (#388) * console log handler * version update * doc * skippable steps * notebook update * constraint sign * doc for constraints * bug fix: define-by-run and unflatten_hierarchical * const * handle nested space in indexof() * test grid search * test suggestion * model test * >1 ckpts * always increase iter count * log total # iterations * security patch * make iter_per_learner consistent 2022-01-15 05:39:09 +08:00			`from sklearn.datasets import make_classification`
			`import numpy as np`
			`from pandas import DataFrame`
			`from datetime import datetime`
			`from flaml.model import (`
			`KNeighborsEstimator,`
			`LRL2Classifier,`
			`BaseEstimator,`
			`LGBMEstimator,`
			`CatBoostEstimator,`
			`XGBoostEstimator,`
			`RandomForestEstimator,`
			`Prophet,`
			`ARIMA,`
Support time series forecasting for discrete target variable (#416) * support 'ts_forecast_classification' task to forecast discrete values * update test_forecast.py - add test for forecasting discrete values * update test_model.py * pre-commit changes 2022-01-25 10:39:36 +08:00			`LGBM_TS,`
fix issues in logging, bug in space.py, constraint sign, and improve code coverage (#388) * console log handler * version update * doc * skippable steps * notebook update * constraint sign * doc for constraints * bug fix: define-by-run and unflatten_hierarchical * const * handle nested space in indexof() * test grid search * test suggestion * model test * >1 ckpts * always increase iter count * log total # iterations * security patch * make iter_per_learner consistent 2022-01-15 05:39:09 +08:00			`)`


			`def test_lrl2():`
			`BaseEstimator.search_space(1, "")`
			`X, y = make_classification(100000, 1000)`
			`print("start")`
			`lr = LRL2Classifier()`
			`lr.predict(X)`
			`lr.fit(X, y, budget=1e-5)`


			`def test_prep():`
			`X = np.array(`
			`list(`
			`zip(`
			`[`
			`3.0,`
			`16.0,`
			`10.0,`
			`12.0,`
			`3.0,`
			`14.0,`
			`11.0,`
			`12.0,`
			`5.0,`
			`14.0,`
			`20.0,`
			`16.0,`
			`15.0,`
			`11.0,`
			`],`
			`[`
			`"a",`
			`"b",`
			`"a",`
			`"c",`
			`"c",`
			`"b",`
			`"b",`
			`"b",`
			`"b",`
			`"a",`
			`"b",`
			`1.0,`
			`1.0,`
			`"a",`
			`],`
			`)`
			`),`
			`dtype=object,`
			`)`
			`y = np.array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])`
			`lr = LRL2Classifier()`
			`lr.fit(X, y)`
			`lr.predict(X)`
Feature names and importances (#621) * feature names and importances * None check * StackingClassifier has no feature_importances_ * StackingClassifier has no feature_names_in_ 2022-07-11 03:25:59 +08:00			`print(lr.feature_names_in_)`
			`print(lr.feature_importances_)`
fix issues in logging, bug in space.py, constraint sign, and improve code coverage (#388) * console log handler * version update * doc * skippable steps * notebook update * constraint sign * doc for constraints * bug fix: define-by-run and unflatten_hierarchical * const * handle nested space in indexof() * test grid search * test suggestion * model test * >1 ckpts * always increase iter count * log total # iterations * security patch * make iter_per_learner consistent 2022-01-15 05:39:09 +08:00			`lgbm = LGBMEstimator(n_estimators=4)`
			`lgbm.fit(X, y)`
Feature names and importances (#621) * feature names and importances * None check * StackingClassifier has no feature_importances_ * StackingClassifier has no feature_names_in_ 2022-07-11 03:25:59 +08:00			`print(lgbm.feature_names_in_)`
			`print(lgbm.feature_importances_)`
fix issues in logging, bug in space.py, constraint sign, and improve code coverage (#388) * console log handler * version update * doc * skippable steps * notebook update * constraint sign * doc for constraints * bug fix: define-by-run and unflatten_hierarchical * const * handle nested space in indexof() * test grid search * test suggestion * model test * >1 ckpts * always increase iter count * log total # iterations * security patch * make iter_per_learner consistent 2022-01-15 05:39:09 +08:00			`cat = CatBoostEstimator(n_estimators=4)`
			`cat.fit(X, y)`
Feature names and importances (#621) * feature names and importances * None check * StackingClassifier has no feature_importances_ * StackingClassifier has no feature_names_in_ 2022-07-11 03:25:59 +08:00			`print(cat.feature_names_in_)`
			`print(cat.feature_importances_)`
fix issues in logging, bug in space.py, constraint sign, and improve code coverage (#388) * console log handler * version update * doc * skippable steps * notebook update * constraint sign * doc for constraints * bug fix: define-by-run and unflatten_hierarchical * const * handle nested space in indexof() * test grid search * test suggestion * model test * >1 ckpts * always increase iter count * log total # iterations * security patch * make iter_per_learner consistent 2022-01-15 05:39:09 +08:00			`knn = KNeighborsEstimator(task="regression")`
			`knn.fit(X, y)`
Feature names and importances (#621) * feature names and importances * None check * StackingClassifier has no feature_importances_ * StackingClassifier has no feature_names_in_ 2022-07-11 03:25:59 +08:00			`print(knn.feature_names_in_)`
			`print(knn.feature_importances_)`
fix issues in logging, bug in space.py, constraint sign, and improve code coverage (#388) * console log handler * version update * doc * skippable steps * notebook update * constraint sign * doc for constraints * bug fix: define-by-run and unflatten_hierarchical * const * handle nested space in indexof() * test grid search * test suggestion * model test * >1 ckpts * always increase iter count * log total # iterations * security patch * make iter_per_learner consistent 2022-01-15 05:39:09 +08:00			`xgb = XGBoostEstimator(n_estimators=4, max_leaves=4)`
			`xgb.fit(X, y)`
			`xgb.predict(X)`
Feature names and importances (#621) * feature names and importances * None check * StackingClassifier has no feature_importances_ * StackingClassifier has no feature_names_in_ 2022-07-11 03:25:59 +08:00			`print(xgb.feature_names_in_)`
			`print(xgb.feature_importances_)`
fix issues in logging, bug in space.py, constraint sign, and improve code coverage (#388) * console log handler * version update * doc * skippable steps * notebook update * constraint sign * doc for constraints * bug fix: define-by-run and unflatten_hierarchical * const * handle nested space in indexof() * test grid search * test suggestion * model test * >1 ckpts * always increase iter count * log total # iterations * security patch * make iter_per_learner consistent 2022-01-15 05:39:09 +08:00			`rf = RandomForestEstimator(task="regression", n_estimators=4, criterion="gini")`
			`rf.fit(X, y)`
Feature names and importances (#621) * feature names and importances * None check * StackingClassifier has no feature_importances_ * StackingClassifier has no feature_names_in_ 2022-07-11 03:25:59 +08:00			`print(rf.feature_names_in_)`
			`print(rf.feature_importances_)`
fix issues in logging, bug in space.py, constraint sign, and improve code coverage (#388) * console log handler * version update * doc * skippable steps * notebook update * constraint sign * doc for constraints * bug fix: define-by-run and unflatten_hierarchical * const * handle nested space in indexof() * test grid search * test suggestion * model test * >1 ckpts * always increase iter count * log total # iterations * security patch * make iter_per_learner consistent 2022-01-15 05:39:09 +08:00
			`prophet = Prophet()`
			`try:`
			`prophet.predict(4)`
			`except ValueError:`
			`# predict() with steps is only supported for arima/sarimax.`
			`pass`
			`prophet.predict(X)`

			`arima = ARIMA()`
			`arima.predict(X)`
			`arima._model = False`
			`try:`
			`arima.predict(X)`
			`except ValueError:`
			`# X_test needs to be either a pandas Dataframe with dates as the first column or an int number of periods for predict().`
			`pass`

Support time series forecasting for discrete target variable (#416) * support 'ts_forecast_classification' task to forecast discrete values * update test_forecast.py - add test for forecasting discrete values * update test_model.py * pre-commit changes 2022-01-25 10:39:36 +08:00			`lgbm = LGBM_TS(optimize_for_horizon=True, lags=1)`
fix issues in logging, bug in space.py, constraint sign, and improve code coverage (#388) * console log handler * version update * doc * skippable steps * notebook update * constraint sign * doc for constraints * bug fix: define-by-run and unflatten_hierarchical * const * handle nested space in indexof() * test grid search * test suggestion * model test * >1 ckpts * always increase iter count * log total # iterations * security patch * make iter_per_learner consistent 2022-01-15 05:39:09 +08:00			`X = DataFrame(`
			`{`
			`"A": [`
			`datetime(1900, 2, 3),`
			`datetime(1900, 3, 4),`
			`datetime(1900, 3, 4),`
			`datetime(1900, 3, 4),`
			`datetime(1900, 7, 2),`
			`datetime(1900, 8, 9),`
			`],`
			`}`
			`)`
			`y = np.array([0, 1, 0, 1, 0, 0])`
			`lgbm.predict(X[:2])`
			`lgbm.fit(X, y, period=2)`
			`lgbm.predict(X[:2])`
Feature names and importances (#621) * feature names and importances * None check * StackingClassifier has no feature_importances_ * StackingClassifier has no feature_names_in_ 2022-07-11 03:25:59 +08:00			`print(lgbm.feature_names_in_)`
			`print(lgbm.feature_importances_)`


			`if __name__ == "__main__":`
			`test_prep()`