Bug fix and add documentation for metric_constraints (#498)

* metric constraint documentation * update link * update notebook * fix a bug in adding 'time_total_s' to result * use the default multiple factor from config file * update notebook * format * improve test * revise test budget for macos * bug fix in adding time_total_s * increase performance check budget * revise test * update notebook * uncomment test * remove redundancy * clear output * remove n_jobs * remove constraint in notebook * increase budget * revise test * add python version * use getattr * improve code robustness Co-authored-by: Qingyun Wu <qxw5138@psu.edu>
2022-03-26 21:11:45 -04:00 · 2022-03-26 21:11:45 -04:00 · 6c16e47e42
parent 72301b8568
commit 6c16e47e42
10 changed files with 501 additions and 599 deletions
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -115,6 +115,7 @@ class SearchState:
        self._hp_names = list(self._search_space_domain.keys())
        self.search_alg = None
        self.best_config = None
+        self.best_result = None
        self.best_loss = self.best_loss_old = np.inf
        self.total_time_used = 0
        self.total_iter = 0
@ -157,6 +158,7 @@ class SearchState:
        if (obj is not None) and (self.best_loss is None or obj < self.best_loss):
            self.best_loss_old = self.best_loss if self.best_loss < np.inf else 2 * obj
            self.best_loss = obj
+            self.best_result = result
            self.time_best_found_old = self.time_best_found
            self.time_best_found = self.total_time_used
            self.iter_best_found = self.total_iter
@ -535,12 +537,12 @@ class AutoML(BaseEstimator):
                Each element in this list is a 3-tuple, which shall be expressed
                in the following format: the first element of the 3-tuple is the name of the
                metric, the second element is the inequality sign chosen from ">=" and "<=",
-                and the third element is the constraint value. E.g., `('precision', '>=', 0.9)`.
+                and the third element is the constraint value. E.g., `('val_loss', '<=', 0.1)`.
                Note that all the metric names in metric_constraints need to be reported via
                the metrics_to_log dictionary returned by a customized metric function.
                The customized metric function shall be provided via the `metric` key word
                argument of the fit() function or the automl constructor.
-                Find examples in this [test](https://github.com/microsoft/FLAML/tree/main/test/automl/test_constraints.py).
+                Find an example in the 4th constraint type in this [doc](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#constraint).
                If `pred_time_limit` is provided as one of keyword arguments to fit() function or
                the automl constructor, flaml will automatically (and under the hood)
                add it as an additional element in the metric_constraints. Essentially 'pred_time_limit'
@ -658,6 +660,22 @@ class AutoML(BaseEstimator):
        """A float of the best loss found."""
        return self._state.best_loss

+    @property
+    def best_result(self):
+        """Result dictionary for model trained with the best config."""
+        state = self._search_states.get(self._best_estimator)
+        return state and getattr(state, "best_result", None)
+
+    @property
+    def metrics_for_best_config(self):
+        """Returns a float of the best loss, and a dictionary of the auxiliary metrics to log
+        associated with the best config. These two objects correspond to the returned
+        objects by the customized metric function for the config with the best loss."""
+        state = self._search_states.get(self._best_estimator)
+        return self._state.best_loss, state and getattr(state, "best_result", {}).get(
+            "metric_for_logging"
+        )
+
    @property
    def best_config_train_time(self):
        """A float of the seconds taken by training the best config."""
--- a/flaml/searcher/flow2.py
+++ b/flaml/searcher/flow2.py
@ -18,6 +18,7 @@ except (ImportError, AssertionError):
    from ..tune import sample
    from ..tune.trial import flatten_dict, unflatten_dict
 from flaml.tune.sample import _BackwardsCompatibleNumpyRng
+from flaml.config import SAMPLE_MULTIPLY_FACTOR
 from ..tune.space import (
    complete_config,
    denormalize,
@ -43,7 +44,7 @@ class FLOW2(Searcher):
        resource_attr: Optional[str] = None,
        min_resource: Optional[float] = None,
        max_resource: Optional[float] = None,
-        resource_multiple_factor: Optional[float] = 4,
+        resource_multiple_factor: Optional[float] = None,
        cost_attr: Optional[str] = "time_total_s",
        seed: Optional[int] = 20,
    ):
@ -91,7 +92,7 @@ class FLOW2(Searcher):
        self.best_config = flatten_dict(init_config)
        self.resource_attr = resource_attr
        self.min_resource = min_resource
-        self.resource_multiple_factor = resource_multiple_factor or 4
+        self.resource_multiple_factor = resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR
        self.cost_attr = cost_attr
        self.max_resource = max_resource
        self._resource = None
--- a/flaml/tune/trial_runner.py
+++ b/flaml/tune/trial_runner.py
@ -82,6 +82,8 @@ class BaseTrialRunner:

    def process_trial_result(self, trial, result):
        trial.update_last_result(result)
+        if "time_total_s" not in result.keys():
+            result["time_total_s"] = trial.last_update_time - trial.start_time
        self._search_alg.on_trial_result(trial.trial_id, result)
        if self._scheduler_alg:
            decision = self._scheduler_alg.on_trial_result(self, trial, result)
--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@ -105,7 +105,6 @@ def report(_metric=None, **kwargs):
        for key, value in trial.config.items():
            result["config/" + key] = value
        _runner.process_trial_result(trial, result)
-        result["time_total_s"] = trial.last_update_time - trial.start_time
        if _verbose > 2:
            logger.info(f"result: {result}")
        if trial.is_finished():
--- a/notebook/automl_classification.ipynb
+++ b/notebook/automl_classification.ipynb
--- a/test/automl/test_constraints.py
+++ b/test/automl/test_constraints.py
@ -97,7 +97,11 @@ def custom_metric(

 def test_metric_constraints_custom():
    automl = AutoML()
-
+    # When you are providing a custom metric function, you can also specify constraints
+    # on one or more of the metrics reported via the second object, i.e., a metrics_to_log dictionary,
+    # returned by the custom metric function.
+    # For example, in the following code, we add a constraint on the `pred_time` metrics and `val_train_loss_gap` metric
+    # reported in `custom_metric` defined above, respectively.
    automl_settings = {
        "estimator_list": ["xgboost"],
        "task": "classification",
@ -128,6 +132,22 @@ def test_metric_constraints_custom():
    print(automl.estimator_list)
    print(automl.search_space)
    print(automl.points_to_evaluate)
+    print(
+        "Best minimization objective on validation data: {0:.4g}".format(
+            automl.best_loss
+        )
+    )
+    print(
+        "pred_time of the best config on validation data: {0:.4g}".format(
+            automl.metrics_for_best_config[1]["pred_time"]
+        )
+    )
+    print(
+        "val_train_loss_gap of the best config on validation data: {0:.4g}".format(
+            automl.metrics_for_best_config[1]["val_train_loss_gap"]
+        )
+    )
+
    config = automl.best_config.copy()
    config["learner"] = automl.best_estimator
    automl.trainable(config)
--- a/test/automl/test_notebook_example.py
+++ b/test/automl/test_notebook_example.py
@ -1,3 +1,4 @@
+import sys
 from openml.exceptions import OpenMLServerException
 from requests.exceptions import ChunkedEncodingError

@ -6,6 +7,14 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
    from flaml.data import load_openml_dataset
    import urllib3

+    performance_check_budget = 240
+    if (
+        sys.platform == "darwin"
+        and budget < performance_check_budget
+        and dataset_format == "dataframe"
+        and "3.9" in sys.version
+    ):
+        budget = performance_check_budget  # revise the buget on macos
    try:
        X_train, X_test, y_train, y_test = load_openml_dataset(
            dataset_id=1169, data_dir="test/", dataset_format=dataset_format
@ -53,11 +62,14 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
    """ compute different metric values on testing dataset """
    from flaml.ml import sklearn_metric_loss_score

-    print("accuracy", "=", 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test))
+    accuracy = 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test)
+    print("accuracy", "=", accuracy)
    print(
        "roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test)
    )
    print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test))
+    if budget >= performance_check_budget:
+        assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67"
    from flaml.data import get_output_from_log

    (
@ -128,4 +140,4 @@ def test_mlflow():


 if __name__ == "__main__":
-    test_automl(120)
+    test_automl(240)
--- a/test/tune/test_flaml_raytune_consistency.py
+++ b/test/tune/test_flaml_raytune_consistency.py
@ -75,9 +75,8 @@ def _test_flaml_raytune_consistency(
    )
    flaml_best_config = analysis.best_config
    flaml_config_in_results = [v["config"] for v in analysis.results.values()]
+    flaml_time_in_results = [v["time_total_s"] for v in analysis.results.values()]
    print(analysis.best_trial.last_result)  # the best trial's result
-    print("best flaml", searcher_name, flaml_best_config)  # the best config
-    print("flaml config in results", searcher_name, flaml_config_in_results)

    np.random.seed(100)
    searcher = setup_searcher(searcher_name)
@ -97,8 +96,16 @@ def _test_flaml_raytune_consistency(
    )
    ray_best_config = analysis.best_config
    ray_config_in_results = [v["config"] for v in analysis.results.values()]
+    ray_time_in_results = [v["time_total_s"] for v in analysis.results.values()]
+
    print(analysis.best_trial.last_result)  # the best trial's result
-    print("ray best", searcher_name, analysis.best_config)  # the best config
+    print("time_total_s in flaml", flaml_time_in_results)  # the best trial's result
+    print("time_total_s in ray", ray_time_in_results)  # the best trial's result
+
+    print("best flaml", searcher_name, flaml_best_config)  # the best config
+    print("ray best", searcher_name, ray_best_config)  # the best config
+
+    print("flaml config in results", searcher_name, flaml_config_in_results)
    print("ray config in results", searcher_name, ray_config_in_results)
    assert ray_best_config == flaml_best_config, "best config should be the same"
    assert (
--- a/website/docs/Examples/AutoML-Classification.md
+++ b/website/docs/Examples/AutoML-Classification.md
@ -59,4 +59,4 @@ print(automl.model.estimator)

 ### A more advanced example including custom learner and metric

-[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/flaml_automl.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/flaml_automl.ipynb)
+[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/automl_classification.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/automl_classification.ipynb)
--- a/website/docs/Use-Cases/Task-Oriented-AutoML.md
+++ b/website/docs/Use-Cases/Task-Oriented-AutoML.md
@ -267,6 +267,21 @@ For example,
 ```python
 automl.fit(X_train, y_train, max_iter=100, train_time_limit=1, pred_time_limit=1e-3)
 ```
+4. Constraints on the metrics of the ML model tried in AutoML.
+
+When users provide a [custom metric function](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#optimization-metric), which returns a primary optimization metric and a dictionary of additional metrics (typically also about the model) to log, users can also specify constraints on one or more of the metrics in the dictionary of additional metrics.
+
+Users need to provide a list of such constraints in the following format:
+Each element in this list is a 3-tuple, which shall be expressed
+in the following format: the first element of the 3-tuple is the name of the
+metric, the second element is the inequality sign chosen from ">=" and "<=",
+and the third element is the constraint value. E.g., `('val_loss', '<=', 0.1)`.
+
+For example,
+```python
+metric_constraints = [("train_loss", "<=", 0.1), ("val_loss", "<=", 0.1)]
+automl.fit(X_train, y_train, max_iter=100, train_time_limit=1, metric_constraints=metric_constraints)
+```

 ### Ensemble