example update (#359)

update some examples for consistencies with others.
This commit is contained in:
Chi Wang 2021-12-25 16:13:39 -08:00 committed by GitHub
parent b2900f4b22
commit 2f5d6169d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 58 additions and 44 deletions

View File

@ -433,10 +433,8 @@ class AutoML(BaseEstimator):
):
return metric_to_minimize, metrics_to_log
```
which returns a float number as the minimization objective,
and a dictionary as the metrics to log. E.g.,
```python
def custom_metric(
X_val, y_val, estimator, labels,
@ -468,7 +466,6 @@ class AutoML(BaseEstimator):
set it to be an empty string "".
estimator_list: A list of strings for estimator names, or 'auto'
e.g., ```['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']```
time_budget: A float number of the time budget in seconds.
Use -1 if no time limit.
max_iter: An integer of the maximal number of iterations.
@ -531,7 +528,6 @@ class AutoML(BaseEstimator):
`automl` object and use them in the `new_automl` object.
e.g.,
```python
from flaml import AutoML
automl = AutoML()
@ -1717,7 +1713,6 @@ class AutoML(BaseEstimator):
'mape'. Default is 'auto'.
If passing a customized metric function, the function needs to
have the follwing signature:
```python
def custom_metric(
X_test, y_test, estimator, labels,
@ -1726,33 +1721,30 @@ class AutoML(BaseEstimator):
):
return metric_to_minimize, metrics_to_log
```
which returns a float number as the minimization objective,
and a dictionary as the metrics to log. E.g.,
```python
def custom_metric(
X_val, y_val, estimator, labels,
X_train, y_train, weight_val=None, weight_train=None,
**args,
):
from sklearn.metrics import log_loss
import time
.. code-block:: python
def custom_metric(
X_val, y_val, estimator, labels,
X_train, y_train, weight_val=None, weight_train=None,
**args,
):
from sklearn.metrics import log_loss
import time
start = time.time()
y_pred = estimator.predict_proba(X_val)
pred_time = (time.time() - start) / len(X_val)
val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
y_pred = estimator.predict_proba(X_train)
train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
alpha = 0.5
return val_loss * (1 + alpha) - alpha * train_loss, {
"val_loss": val_loss,
"train_loss": train_loss,
"pred_time": pred_time,
}
start = time.time()
y_pred = estimator.predict_proba(X_val)
pred_time = (time.time() - start) / len(X_val)
val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
y_pred = estimator.predict_proba(X_train)
train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
alpha = 0.5
return val_loss * (1 + alpha) - alpha * train_loss, {
"val_loss": val_loss,
"train_loss": train_loss,
"pred_time": pred_time,
}
```
task: A string of the task type, e.g.,
'classification', 'regression', 'ts_forecast', 'rank',
'seq-classification', 'seq-regression', 'summarization'

View File

@ -2,6 +2,7 @@ import unittest
import numpy as np
import scipy.sparse
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import pandas as pd
from datetime import datetime
from flaml import AutoML
@ -221,14 +222,28 @@ class TestClassification(unittest.TestCase):
print(automl_experiment.best_estimator)
def test_ray_classification(self):
from sklearn.datasets import make_classification
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
X, y = make_classification(1000, 10)
automl = AutoML()
try:
automl.fit(X, y, time_budget=10, task="classification", use_ray=True)
automl.fit(
X, y, time_budget=10, task="classification", n_concurrent_trials=2
X_train,
y_train,
X_val=X_test,
y_val=y_test,
time_budget=10,
task="classification",
use_ray=True,
)
automl.fit(
X_train,
y_train,
X_val=X_test,
y_val=y_test,
time_budget=10,
task="classification",
n_concurrent_trials=2,
)
except ImportError:
return

View File

@ -1,30 +1,28 @@
import ray
import lightgbm as lgb
import numpy as np
import sklearn.datasets
import sklearn.metrics
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from flaml import tune
from flaml.model import LGBMEstimator
data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.25)
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
def train_breast_cancer(config):
params = LGBMEstimator(**config).params
train_set = lgb.Dataset(train_x, label=train_y)
train_set = lgb.Dataset(X_train, label=y_train)
gbm = lgb.train(params, train_set)
preds = gbm.predict(test_x)
preds = gbm.predict(X_test)
pred_labels = np.rint(preds)
tune.report(
mean_accuracy=sklearn.metrics.accuracy_score(test_y, pred_labels), done=True
)
tune.report(mean_accuracy=accuracy_score(y_test, pred_labels), done=True)
if __name__ == "__main__":
ray.init(address="auto")
flaml_lgbm_search_space = LGBMEstimator.search_space(train_x.shape)
flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
config_search_space = {
hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
}

View File

@ -36,6 +36,14 @@ low_cost_partial_config = {
for hp, space in flaml_lgbm_search_space.items()
if "low_cost_init_value" in space
}
# initial points to evaluate
points_to_evaluate = [
{
hp: space["init_value"]
for hp, space in flaml_lgbm_search_space.items()
if "init_value" in space
}
]
# run the tuning, minimizing mse, with total time budget 3 seconds
analysis = tune.run(
train_lgbm,
@ -43,6 +51,7 @@ analysis = tune.run(
mode="min",
config=config_search_space,
low_cost_partial_config=low_cost_partial_config,
points_to_evaluate=points_to_evaluate,
time_budget_s=3,
num_samples=-1,
)

View File

@ -74,7 +74,7 @@ analysis = tune.run(
low_cost_partial_config=low_cost_partial_config, time_budget_s=3, num_samples=-1,
)
```
Please see this [script](https://github.com/microsoft/FLAML/blob/main/test/tune.py) for the complete version of the above example.
Please see this [script](https://github.com/microsoft/FLAML/blob/main/test/tune_example.py) for the complete version of the above example.
### Where to Go Next?