example update (#359)

update some examples for consistencies with others.
2021-12-25 16:13:39 -08:00 · 2021-12-25 16:13:39 -08:00 · 2f5d6169d3
parent b2900f4b22
commit 2f5d6169d3
5 changed files with 58 additions and 44 deletions
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -433,10 +433,8 @@ class AutoML(BaseEstimator):
        ):
            return metric_to_minimize, metrics_to_log
        ```
-
                which returns a float number as the minimization objective,
                and a dictionary as the metrics to log. E.g.,
-
        ```python
        def custom_metric(
            X_val, y_val, estimator, labels,
@ -468,7 +466,6 @@ class AutoML(BaseEstimator):
                set it to be an empty string "".
            estimator_list: A list of strings for estimator names, or 'auto'
                e.g., ```['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']```
-
            time_budget: A float number of the time budget in seconds.
                Use -1 if no time limit.
            max_iter: An integer of the maximal number of iterations.
@ -531,7 +528,6 @@ class AutoML(BaseEstimator):
                `automl` object and use them in the `new_automl` object.
                e.g.,

-
        ```python
        from flaml import AutoML
        automl = AutoML()
@ -1717,7 +1713,6 @@ class AutoML(BaseEstimator):
                'mape'. Default is 'auto'.
                If passing a customized metric function, the function needs to
                have the follwing signature:
-
        ```python
        def custom_metric(
            X_test, y_test, estimator, labels,
@ -1726,33 +1721,30 @@ class AutoML(BaseEstimator):
        ):
            return metric_to_minimize, metrics_to_log
        ```
-
                which returns a float number as the minimization objective,
                and a dictionary as the metrics to log. E.g.,
+        ```python
+        def custom_metric(
+            X_val, y_val, estimator, labels,
+            X_train, y_train, weight_val=None, weight_train=None,
+            **args,
+        ):
+            from sklearn.metrics import log_loss
+            import time

-                .. code-block:: python
-
-                    def custom_metric(
-                        X_val, y_val, estimator, labels,
-                        X_train, y_train, weight_val=None, weight_train=None,
-                        **args,
-                    ):
-                        from sklearn.metrics import log_loss
-                        import time
-
-                        start = time.time()
-                        y_pred = estimator.predict_proba(X_val)
-                        pred_time = (time.time() - start) / len(X_val)
-                        val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
-                        y_pred = estimator.predict_proba(X_train)
-                        train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
-                        alpha = 0.5
-                        return val_loss * (1 + alpha) - alpha * train_loss, {
-                            "val_loss": val_loss,
-                            "train_loss": train_loss,
-                            "pred_time": pred_time,
-                        }
-
+            start = time.time()
+            y_pred = estimator.predict_proba(X_val)
+            pred_time = (time.time() - start) / len(X_val)
+            val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
+            y_pred = estimator.predict_proba(X_train)
+            train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
+            alpha = 0.5
+            return val_loss * (1 + alpha) - alpha * train_loss, {
+                "val_loss": val_loss,
+                "train_loss": train_loss,
+                "pred_time": pred_time,
+            }
+        ```
            task: A string of the task type, e.g.,
                'classification', 'regression', 'ts_forecast', 'rank',
                'seq-classification', 'seq-regression', 'summarization'
--- a/test/automl/test_classification.py
+++ b/test/automl/test_classification.py
@ -2,6 +2,7 @@ import unittest
 import numpy as np
 import scipy.sparse
 from sklearn.datasets import load_breast_cancer
+from sklearn.model_selection import train_test_split
 import pandas as pd
 from datetime import datetime
 from flaml import AutoML
@ -221,14 +222,28 @@ class TestClassification(unittest.TestCase):
        print(automl_experiment.best_estimator)

    def test_ray_classification(self):
-        from sklearn.datasets import make_classification
+        X, y = load_breast_cancer(return_X_y=True)
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

-        X, y = make_classification(1000, 10)
        automl = AutoML()
        try:
-            automl.fit(X, y, time_budget=10, task="classification", use_ray=True)
            automl.fit(
-                X, y, time_budget=10, task="classification", n_concurrent_trials=2
+                X_train,
+                y_train,
+                X_val=X_test,
+                y_val=y_test,
+                time_budget=10,
+                task="classification",
+                use_ray=True,
+            )
+            automl.fit(
+                X_train,
+                y_train,
+                X_val=X_test,
+                y_val=y_test,
+                time_budget=10,
+                task="classification",
+                n_concurrent_trials=2,
            )
        except ImportError:
            return
--- a/test/ray/distribute_tune.py
+++ b/test/ray/distribute_tune.py
@ -1,30 +1,28 @@
 import ray
 import lightgbm as lgb
 import numpy as np
-import sklearn.datasets
-import sklearn.metrics
+from sklearn.datasets import load_breast_cancer
+from sklearn.metrics import accuracy_score
 from sklearn.model_selection import train_test_split
 from flaml import tune
 from flaml.model import LGBMEstimator

-data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
-train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.25)
+X, y = load_breast_cancer(return_X_y=True)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)


 def train_breast_cancer(config):
    params = LGBMEstimator(**config).params
-    train_set = lgb.Dataset(train_x, label=train_y)
+    train_set = lgb.Dataset(X_train, label=y_train)
    gbm = lgb.train(params, train_set)
-    preds = gbm.predict(test_x)
+    preds = gbm.predict(X_test)
    pred_labels = np.rint(preds)
-    tune.report(
-        mean_accuracy=sklearn.metrics.accuracy_score(test_y, pred_labels), done=True
-    )
+    tune.report(mean_accuracy=accuracy_score(y_test, pred_labels), done=True)


 if __name__ == "__main__":
    ray.init(address="auto")
-    flaml_lgbm_search_space = LGBMEstimator.search_space(train_x.shape)
+    flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
    config_search_space = {
        hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
    }
--- a/test/tune_example.py
+++ b/test/tune_example.py
@ -36,6 +36,14 @@ low_cost_partial_config = {
    for hp, space in flaml_lgbm_search_space.items()
    if "low_cost_init_value" in space
 }
+# initial points to evaluate
+points_to_evaluate = [
+    {
+        hp: space["init_value"]
+        for hp, space in flaml_lgbm_search_space.items()
+        if "init_value" in space
+    }
+]
 # run the tuning, minimizing mse, with total time budget 3 seconds
 analysis = tune.run(
    train_lgbm,
@ -43,6 +51,7 @@ analysis = tune.run(
    mode="min",
    config=config_search_space,
    low_cost_partial_config=low_cost_partial_config,
+    points_to_evaluate=points_to_evaluate,
    time_budget_s=3,
    num_samples=-1,
 )
--- a/website/docs/Getting-Started.md
+++ b/website/docs/Getting-Started.md
@ -74,7 +74,7 @@ analysis = tune.run(
    low_cost_partial_config=low_cost_partial_config, time_budget_s=3, num_samples=-1,
 )
 ```
-Please see this [script](https://github.com/microsoft/FLAML/blob/main/test/tune.py) for the complete version of the above example.
+Please see this [script](https://github.com/microsoft/FLAML/blob/main/test/tune_example.py) for the complete version of the above example.

 ### Where to Go Next?