mirror of https://github.com/microsoft/autogen.git
Bug fix and add documentation for metric_constraints (#498)
* metric constraint documentation * update link * update notebook * fix a bug in adding 'time_total_s' to result * use the default multiple factor from config file * update notebook * format * improve test * revise test budget for macos * bug fix in adding time_total_s * increase performance check budget * revise test * update notebook * uncomment test * remove redundancy * clear output * remove n_jobs * remove constraint in notebook * increase budget * revise test * add python version * use getattr * improve code robustness Co-authored-by: Qingyun Wu <qxw5138@psu.edu>
This commit is contained in:
parent
72301b8568
commit
6c16e47e42
|
@ -115,6 +115,7 @@ class SearchState:
|
|||
self._hp_names = list(self._search_space_domain.keys())
|
||||
self.search_alg = None
|
||||
self.best_config = None
|
||||
self.best_result = None
|
||||
self.best_loss = self.best_loss_old = np.inf
|
||||
self.total_time_used = 0
|
||||
self.total_iter = 0
|
||||
|
@ -157,6 +158,7 @@ class SearchState:
|
|||
if (obj is not None) and (self.best_loss is None or obj < self.best_loss):
|
||||
self.best_loss_old = self.best_loss if self.best_loss < np.inf else 2 * obj
|
||||
self.best_loss = obj
|
||||
self.best_result = result
|
||||
self.time_best_found_old = self.time_best_found
|
||||
self.time_best_found = self.total_time_used
|
||||
self.iter_best_found = self.total_iter
|
||||
|
@ -535,12 +537,12 @@ class AutoML(BaseEstimator):
|
|||
Each element in this list is a 3-tuple, which shall be expressed
|
||||
in the following format: the first element of the 3-tuple is the name of the
|
||||
metric, the second element is the inequality sign chosen from ">=" and "<=",
|
||||
and the third element is the constraint value. E.g., `('precision', '>=', 0.9)`.
|
||||
and the third element is the constraint value. E.g., `('val_loss', '<=', 0.1)`.
|
||||
Note that all the metric names in metric_constraints need to be reported via
|
||||
the metrics_to_log dictionary returned by a customized metric function.
|
||||
The customized metric function shall be provided via the `metric` key word
|
||||
argument of the fit() function or the automl constructor.
|
||||
Find examples in this [test](https://github.com/microsoft/FLAML/tree/main/test/automl/test_constraints.py).
|
||||
Find an example in the 4th constraint type in this [doc](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#constraint).
|
||||
If `pred_time_limit` is provided as one of keyword arguments to fit() function or
|
||||
the automl constructor, flaml will automatically (and under the hood)
|
||||
add it as an additional element in the metric_constraints. Essentially 'pred_time_limit'
|
||||
|
@ -658,6 +660,22 @@ class AutoML(BaseEstimator):
|
|||
"""A float of the best loss found."""
|
||||
return self._state.best_loss
|
||||
|
||||
@property
|
||||
def best_result(self):
|
||||
"""Result dictionary for model trained with the best config."""
|
||||
state = self._search_states.get(self._best_estimator)
|
||||
return state and getattr(state, "best_result", None)
|
||||
|
||||
@property
|
||||
def metrics_for_best_config(self):
|
||||
"""Returns a float of the best loss, and a dictionary of the auxiliary metrics to log
|
||||
associated with the best config. These two objects correspond to the returned
|
||||
objects by the customized metric function for the config with the best loss."""
|
||||
state = self._search_states.get(self._best_estimator)
|
||||
return self._state.best_loss, state and getattr(state, "best_result", {}).get(
|
||||
"metric_for_logging"
|
||||
)
|
||||
|
||||
@property
|
||||
def best_config_train_time(self):
|
||||
"""A float of the seconds taken by training the best config."""
|
||||
|
|
|
@ -18,6 +18,7 @@ except (ImportError, AssertionError):
|
|||
from ..tune import sample
|
||||
from ..tune.trial import flatten_dict, unflatten_dict
|
||||
from flaml.tune.sample import _BackwardsCompatibleNumpyRng
|
||||
from flaml.config import SAMPLE_MULTIPLY_FACTOR
|
||||
from ..tune.space import (
|
||||
complete_config,
|
||||
denormalize,
|
||||
|
@ -43,7 +44,7 @@ class FLOW2(Searcher):
|
|||
resource_attr: Optional[str] = None,
|
||||
min_resource: Optional[float] = None,
|
||||
max_resource: Optional[float] = None,
|
||||
resource_multiple_factor: Optional[float] = 4,
|
||||
resource_multiple_factor: Optional[float] = None,
|
||||
cost_attr: Optional[str] = "time_total_s",
|
||||
seed: Optional[int] = 20,
|
||||
):
|
||||
|
@ -91,7 +92,7 @@ class FLOW2(Searcher):
|
|||
self.best_config = flatten_dict(init_config)
|
||||
self.resource_attr = resource_attr
|
||||
self.min_resource = min_resource
|
||||
self.resource_multiple_factor = resource_multiple_factor or 4
|
||||
self.resource_multiple_factor = resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR
|
||||
self.cost_attr = cost_attr
|
||||
self.max_resource = max_resource
|
||||
self._resource = None
|
||||
|
|
|
@ -82,6 +82,8 @@ class BaseTrialRunner:
|
|||
|
||||
def process_trial_result(self, trial, result):
|
||||
trial.update_last_result(result)
|
||||
if "time_total_s" not in result.keys():
|
||||
result["time_total_s"] = trial.last_update_time - trial.start_time
|
||||
self._search_alg.on_trial_result(trial.trial_id, result)
|
||||
if self._scheduler_alg:
|
||||
decision = self._scheduler_alg.on_trial_result(self, trial, result)
|
||||
|
|
|
@ -105,7 +105,6 @@ def report(_metric=None, **kwargs):
|
|||
for key, value in trial.config.items():
|
||||
result["config/" + key] = value
|
||||
_runner.process_trial_result(trial, result)
|
||||
result["time_total_s"] = trial.last_update_time - trial.start_time
|
||||
if _verbose > 2:
|
||||
logger.info(f"result: {result}")
|
||||
if trial.is_finished():
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -97,7 +97,11 @@ def custom_metric(
|
|||
|
||||
def test_metric_constraints_custom():
|
||||
automl = AutoML()
|
||||
|
||||
# When you are providing a custom metric function, you can also specify constraints
|
||||
# on one or more of the metrics reported via the second object, i.e., a metrics_to_log dictionary,
|
||||
# returned by the custom metric function.
|
||||
# For example, in the following code, we add a constraint on the `pred_time` metrics and `val_train_loss_gap` metric
|
||||
# reported in `custom_metric` defined above, respectively.
|
||||
automl_settings = {
|
||||
"estimator_list": ["xgboost"],
|
||||
"task": "classification",
|
||||
|
@ -128,6 +132,22 @@ def test_metric_constraints_custom():
|
|||
print(automl.estimator_list)
|
||||
print(automl.search_space)
|
||||
print(automl.points_to_evaluate)
|
||||
print(
|
||||
"Best minimization objective on validation data: {0:.4g}".format(
|
||||
automl.best_loss
|
||||
)
|
||||
)
|
||||
print(
|
||||
"pred_time of the best config on validation data: {0:.4g}".format(
|
||||
automl.metrics_for_best_config[1]["pred_time"]
|
||||
)
|
||||
)
|
||||
print(
|
||||
"val_train_loss_gap of the best config on validation data: {0:.4g}".format(
|
||||
automl.metrics_for_best_config[1]["val_train_loss_gap"]
|
||||
)
|
||||
)
|
||||
|
||||
config = automl.best_config.copy()
|
||||
config["learner"] = automl.best_estimator
|
||||
automl.trainable(config)
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import sys
|
||||
from openml.exceptions import OpenMLServerException
|
||||
from requests.exceptions import ChunkedEncodingError
|
||||
|
||||
|
@ -6,6 +7,14 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
|
|||
from flaml.data import load_openml_dataset
|
||||
import urllib3
|
||||
|
||||
performance_check_budget = 240
|
||||
if (
|
||||
sys.platform == "darwin"
|
||||
and budget < performance_check_budget
|
||||
and dataset_format == "dataframe"
|
||||
and "3.9" in sys.version
|
||||
):
|
||||
budget = performance_check_budget # revise the buget on macos
|
||||
try:
|
||||
X_train, X_test, y_train, y_test = load_openml_dataset(
|
||||
dataset_id=1169, data_dir="test/", dataset_format=dataset_format
|
||||
|
@ -53,11 +62,14 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
|
|||
""" compute different metric values on testing dataset """
|
||||
from flaml.ml import sklearn_metric_loss_score
|
||||
|
||||
print("accuracy", "=", 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test))
|
||||
accuracy = 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test)
|
||||
print("accuracy", "=", accuracy)
|
||||
print(
|
||||
"roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test)
|
||||
)
|
||||
print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test))
|
||||
if budget >= performance_check_budget:
|
||||
assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67"
|
||||
from flaml.data import get_output_from_log
|
||||
|
||||
(
|
||||
|
@ -128,4 +140,4 @@ def test_mlflow():
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_automl(120)
|
||||
test_automl(240)
|
||||
|
|
|
@ -75,9 +75,8 @@ def _test_flaml_raytune_consistency(
|
|||
)
|
||||
flaml_best_config = analysis.best_config
|
||||
flaml_config_in_results = [v["config"] for v in analysis.results.values()]
|
||||
flaml_time_in_results = [v["time_total_s"] for v in analysis.results.values()]
|
||||
print(analysis.best_trial.last_result) # the best trial's result
|
||||
print("best flaml", searcher_name, flaml_best_config) # the best config
|
||||
print("flaml config in results", searcher_name, flaml_config_in_results)
|
||||
|
||||
np.random.seed(100)
|
||||
searcher = setup_searcher(searcher_name)
|
||||
|
@ -97,8 +96,16 @@ def _test_flaml_raytune_consistency(
|
|||
)
|
||||
ray_best_config = analysis.best_config
|
||||
ray_config_in_results = [v["config"] for v in analysis.results.values()]
|
||||
ray_time_in_results = [v["time_total_s"] for v in analysis.results.values()]
|
||||
|
||||
print(analysis.best_trial.last_result) # the best trial's result
|
||||
print("ray best", searcher_name, analysis.best_config) # the best config
|
||||
print("time_total_s in flaml", flaml_time_in_results) # the best trial's result
|
||||
print("time_total_s in ray", ray_time_in_results) # the best trial's result
|
||||
|
||||
print("best flaml", searcher_name, flaml_best_config) # the best config
|
||||
print("ray best", searcher_name, ray_best_config) # the best config
|
||||
|
||||
print("flaml config in results", searcher_name, flaml_config_in_results)
|
||||
print("ray config in results", searcher_name, ray_config_in_results)
|
||||
assert ray_best_config == flaml_best_config, "best config should be the same"
|
||||
assert (
|
||||
|
|
|
@ -59,4 +59,4 @@ print(automl.model.estimator)
|
|||
|
||||
### A more advanced example including custom learner and metric
|
||||
|
||||
[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/flaml_automl.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/flaml_automl.ipynb)
|
||||
[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/automl_classification.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/automl_classification.ipynb)
|
|
@ -267,6 +267,21 @@ For example,
|
|||
```python
|
||||
automl.fit(X_train, y_train, max_iter=100, train_time_limit=1, pred_time_limit=1e-3)
|
||||
```
|
||||
4. Constraints on the metrics of the ML model tried in AutoML.
|
||||
|
||||
When users provide a [custom metric function](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#optimization-metric), which returns a primary optimization metric and a dictionary of additional metrics (typically also about the model) to log, users can also specify constraints on one or more of the metrics in the dictionary of additional metrics.
|
||||
|
||||
Users need to provide a list of such constraints in the following format:
|
||||
Each element in this list is a 3-tuple, which shall be expressed
|
||||
in the following format: the first element of the 3-tuple is the name of the
|
||||
metric, the second element is the inequality sign chosen from ">=" and "<=",
|
||||
and the third element is the constraint value. E.g., `('val_loss', '<=', 0.1)`.
|
||||
|
||||
For example,
|
||||
```python
|
||||
metric_constraints = [("train_loss", "<=", 0.1), ("val_loss", "<=", 0.1)]
|
||||
automl.fit(X_train, y_train, max_iter=100, train_time_limit=1, metric_constraints=metric_constraints)
|
||||
```
|
||||
|
||||
### Ensemble
|
||||
|
||||
|
|
Loading…
Reference in New Issue