Merge branch 'main' into dependabot/github_actions/actions/setup-python-4

This commit is contained in:
zsk 2022-08-24 13:51:58 -04:00 committed by GitHub
commit c224694f17
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 7607 additions and 1978 deletions

View File

@ -1,6 +0,0 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"

View File

@ -20,7 +20,7 @@ jobs:
- name: Checkout
uses: actions/checkout@v2
- name: Cache conda
uses: actions/cache@v1
uses: actions/cache@v3
with:
path: ~/conda_pkgs_dir
key: conda-${{ matrix.os }}-python-${{ matrix.python-version }}-${{ hashFiles('environment.yml') }}

View File

@ -16,7 +16,7 @@ jobs:
working-directory: website
steps:
- uses: actions/checkout@v2
- uses: actions/setup-node@v2
- uses: actions/setup-node@v3
with:
node-version: 14.x
# cache: yarn
@ -52,7 +52,7 @@ jobs:
working-directory: website
steps:
- uses: actions/checkout@v2
- uses: actions/setup-node@v2
- uses: actions/setup-node@v3
with:
node-version: 14.x
# cache: yarn

View File

@ -94,7 +94,7 @@ You can find a detailed documentation about FLAML [here](https://microsoft.githu
In addition, you can find:
- Demo and tutorials of FLAML [here](https://www.youtube.com/channel/UCfU0zfFXHXdAd5x-WvFBk5A).
- [Talks](https://www.youtube.com/channel/UCfU0zfFXHXdAd5x-WvFBk5A) and [tutorials](https://github.com/microsoft/FLAML/tree/tutorial/tutorial) about FLAML.
- Research around FLAML [here](https://microsoft.github.io/FLAML/docs/Research).

View File

@ -366,6 +366,7 @@ class AutoMLState:
state.best_loss,
state.n_jobs,
state.learner_classes.get(estimator),
state.cv_score_agg_func,
state.log_training_metric,
this_estimator_kwargs,
)
@ -734,6 +735,7 @@ class AutoML(BaseEstimator):
settings["min_sample_size"] = settings.get("min_sample_size", MIN_SAMPLE_TRAIN)
settings["use_ray"] = settings.get("use_ray", False)
settings["metric_constraints"] = settings.get("metric_constraints", [])
settings["cv_score_agg_func"] = settings.get("cv_score_agg_func", None)
settings["fit_kwargs_by_estimator"] = settings.get(
"fit_kwargs_by_estimator", {}
)
@ -2144,6 +2146,7 @@ class AutoML(BaseEstimator):
use_ray=None,
metric_constraints=None,
custom_hp=None,
cv_score_agg_func=None,
skip_transform=None,
fit_kwargs_by_estimator=None,
**fit_kwargs,
@ -2366,6 +2369,38 @@ class AutoML(BaseEstimator):
}
```
cv_score_agg_func: customized cross-validation scores aggregate function. Default to average metrics across folds. If specificed, this function needs to
have the following signature:
```python
def cv_score_agg_func(val_loss_folds, log_metrics_folds):
return metric_to_minimize, metrics_to_log
```
val_loss_folds - list of floats, the loss scores of each fold; log_metrics_folds - list of dicts/floats, the metrics of each fold to log.
This function should return the final aggregate result of all folds. A float number of the minimization objective, and a dictionary as the metrics to log or None.
E.g.,
```python
def cv_score_agg_func(val_loss_folds, log_metrics_folds):
metric_to_minimize = sum(val_loss_folds)/len(val_loss_folds)
metrics_to_log = None
for single_fold in log_metrics_folds:
if metrics_to_log is None:
metrics_to_log = single_fold
elif isinstance(metrics_to_log, dict):
metrics_to_log = {k: metrics_to_log[k] + v for k, v in single_fold.items()}
else:
metrics_to_log += single_fold
if metrics_to_log:
n = len(val_loss_folds)
metrics_to_log = {k: v / n for k, v in metrics_to_log.items()} if isinstance(metrics_to_log, dict) else metrics_to_log / n
return metric_to_minimize, metrics_to_log
```
fit_kwargs_by_estimator: dict, default=None | The user specified keywords arguments, grouped by estimator name.
For TransformersEstimator, available fit_kwargs can be found from
[TrainingArgumentsForAuto](nlp/huggingface/training_args).
e.g.,
skip_transform: boolean, default=False | Whether to pre-process data prior to modeling.
fit_kwargs_by_estimator: dict, default=None | The user specified keywords arguments, grouped by estimator name.
For TransformersEstimator, available fit_kwargs can be found from
@ -2568,6 +2603,9 @@ class AutoML(BaseEstimator):
eval_method = self._decide_eval_method(eval_method, time_budget)
self._state.eval_method = eval_method
logger.info("Evaluation method: {}".format(eval_method))
self._state.cv_score_agg_func = cv_score_agg_func or self._settings.get(
"cv_score_agg_func"
)
self._retrain_in_budget = retrain_full == "budget" and (
eval_method == "holdout" and self._state.X_val is None
@ -3069,7 +3107,9 @@ class AutoML(BaseEstimator):
if mlflow is not None and mlflow.active_run():
with mlflow.start_run(nested=True):
mlflow.log_metric("iter_counter", self._track_iter)
if "intermediate_results" in search_state.metric_for_logging:
if (search_state.metric_for_logging is not None) and (
"intermediate_results" in search_state.metric_for_logging
):
for each_entry in search_state.metric_for_logging[
"intermediate_results"
]:
@ -3079,7 +3119,8 @@ class AutoML(BaseEstimator):
"iter_counter", self._iter_per_learner[estimator]
)
del search_state.metric_for_logging["intermediate_results"]
mlflow.log_metrics(search_state.metric_for_logging)
if search_state.metric_for_logging:
mlflow.log_metrics(search_state.metric_for_logging)
mlflow.log_metric("trial_time", search_state.trial_time)
mlflow.log_metric("wall_clock_time", self._state.time_from_start)
mlflow.log_metric("validation_loss", search_state.val_loss)

View File

@ -431,6 +431,26 @@ def get_val_loss(
return val_loss, metric_for_logging, train_time, pred_time
def default_cv_score_agg_func(val_loss_folds, log_metrics_folds):
metric_to_minimize = sum(val_loss_folds) / len(val_loss_folds)
metrics_to_log = None
for single_fold in log_metrics_folds:
if metrics_to_log is None:
metrics_to_log = single_fold
elif isinstance(metrics_to_log, dict):
metrics_to_log = {k: metrics_to_log[k] + v for k, v in single_fold.items()}
else:
metrics_to_log += single_fold
if metrics_to_log:
n = len(val_loss_folds)
metrics_to_log = (
{k: v / n for k, v in metrics_to_log.items()}
if isinstance(metrics_to_log, dict)
else metrics_to_log / n
)
return metric_to_minimize, metrics_to_log
def evaluate_model_CV(
config,
estimator,
@ -441,15 +461,18 @@ def evaluate_model_CV(
task,
eval_metric,
best_val_loss,
cv_score_agg_func=None,
log_training_metric=False,
fit_kwargs={},
):
if cv_score_agg_func is None:
cv_score_agg_func = default_cv_score_agg_func
start_time = time.time()
total_val_loss = 0
total_metric = None
val_loss_folds = []
log_metric_folds = []
metric = None
train_time = pred_time = 0
valid_fold_num = total_fold_num = 0
total_fold_num = 0
n = kf.get_n_splits()
X_train_split, y_train_split = X_train_all, y_train_all
if task in CLASSIFICATION:
@ -471,7 +494,6 @@ def evaluate_model_CV(
else:
kf = kf.split(X_train_split)
rng = np.random.RandomState(2020)
val_loss_list = []
budget_per_train = budget / n
if "sample_weight" in fit_kwargs:
weight = fit_kwargs["sample_weight"]
@ -514,33 +536,19 @@ def evaluate_model_CV(
log_training_metric=log_training_metric,
fit_kwargs=fit_kwargs,
)
if isinstance(metric_i, dict) and "intermediate_results" in metric_i.keys():
del metric_i["intermediate_results"]
if weight is not None:
fit_kwargs["sample_weight"] = weight
valid_fold_num += 1
total_fold_num += 1
total_val_loss += val_loss_i
if log_training_metric or not isinstance(eval_metric, str):
if isinstance(total_metric, dict):
total_metric = {k: total_metric[k] + v for k, v in metric_i.items()}
elif total_metric is not None:
total_metric += metric_i
else:
total_metric = metric_i
val_loss_folds.append(val_loss_i)
log_metric_folds.append(metric_i)
train_time += train_time_i
pred_time += pred_time_i
if valid_fold_num == n:
val_loss_list.append(total_val_loss / valid_fold_num)
total_val_loss = valid_fold_num = 0
elif time.time() - start_time >= budget:
val_loss_list.append(total_val_loss / valid_fold_num)
if time.time() - start_time >= budget:
break
val_loss = np.max(val_loss_list)
val_loss, metric = cv_score_agg_func(val_loss_folds, log_metric_folds)
n = total_fold_num
if log_training_metric or not isinstance(eval_metric, str):
if isinstance(total_metric, dict):
metric = {k: v / n for k, v in total_metric.items()}
else:
metric = total_metric / n
pred_time /= n
return val_loss, metric, train_time, pred_time
@ -562,6 +570,7 @@ def compute_estimator(
best_val_loss=np.Inf,
n_jobs=1,
estimator_class=None,
cv_score_agg_func=None,
log_training_metric=False,
fit_kwargs={},
):
@ -608,6 +617,7 @@ def compute_estimator(
task,
eval_metric,
best_val_loss,
cv_score_agg_func,
log_training_metric=log_training_metric,
fit_kwargs=fit_kwargs,
)

View File

@ -1 +1 @@
__version__ = "1.0.11"
__version__ = "1.0.12"

File diff suppressed because one or more lines are too long

View File

@ -154,6 +154,19 @@ def test_mlflow():
pass
# subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
from sklearn.datasets import load_iris
with mlflow.start_run():
automl = AutoML()
automl_settings = {
"time_budget": 2, # in seconds
"metric": "accuracy",
"task": "classification",
"log_file_name": "iris.log",
}
X_train, y_train = load_iris(return_X_y=True)
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
if __name__ == "__main__":
test_automl(600)

View File

@ -90,7 +90,7 @@ Then, you can use it just like you use the original `LGMBClassifier`. Your other
* Understand the use cases for [Task-oriented AutoML](Use-Cases/task-oriented-automl), [Tune user-defined function](Use-Cases/Tune-User-Defined-Function) and [Zero-shot AutoML](Use-Cases/Zero-Shot-AutoML).
* Find code examples under "Examples": from [AutoML - Classification](Examples/AutoML-Classification) to [Tune - PyTorch](Examples/Tune-PyTorch).
* Watch [video tutorials](https://www.youtube.com/channel/UCfU0zfFXHXdAd5x-WvFBk5A).
* Find [talks](https://www.youtube.com/channel/UCfU0zfFXHXdAd5x-WvFBk5A) and [tutorials](https://github.com/microsoft/FLAML/tree/tutorial/tutorial) about FLAML.
* Learn about [research](Research) around FLAML.
* Refer to [SDK](reference/automl) and [FAQ](FAQ).

View File

@ -20,4 +20,4 @@ For technical details, please check our research publications.
* [Fair AutoML](https://arxiv.org/abs/2111.06495). Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2111.06495 (2021).
* [Mining Robust Default Configurations for Resource-constrained AutoML](https://arxiv.org/abs/2202.09927). Moe Kayali, Chi Wang. ArXiv preprint arXiv:2202.09927 (2022).
Many researchers and engineers have contributed to the technology development. In alphabetical order: Vijay Aski, Sebastien Bubeck, Surajit Chaudhuri, Kevin Chen, Yi Wei Chen, Nadiia Chepurko, Ofer Dekel, Alex Deng, Anshuman Dutt, Nicolo Fusi, Jianfeng Gao, Johannes Gehrke, Niklas Gustafsson, Silu Huang, Moe Kayali, Dongwoo Kim, Christian Konig, John Langford, Menghao Li, Mingqin Li, Susan Xueqing Liu, Zhe Liu, Naveen Gaur, Paul Mineiro, Vivek Narasayya, Jake Radzikowski, Marco Rossi, Amin Saied, Neil Tenenholtz, Olga Vrousgou, Chi Wang, Yue Wang, Markus Weimer, Qingyun Wu, Qiufeng Yin, Haozhe Zhang, Minjia Zhang, XiaoYun Zhang, Eric Zhu.
Many researchers and engineers have contributed to the technology development. In alphabetical order: Vijay Aski, Sebastien Bubeck, Surajit Chaudhuri, Kevin Chen, Yi Wei Chen, Nadiia Chepurko, Ofer Dekel, Alex Deng, Anshuman Dutt, Nicolo Fusi, Jianfeng Gao, Johannes Gehrke, Niklas Gustafsson, Silu Huang, Moe Kayali, Dongwoo Kim, Christian Konig, John Langford, Menghao Li, Mingqin Li, Susan Xueqing Liu, Zhe Liu, Naveen Gaur, Paul Mineiro, Vivek Narasayya, Jake Radzikowski, Marco Rossi, Amin Saied, Neil Tenenholtz, Olga Vrousgou, Chi Wang, Yue Wang, Markus Weimer, Qingyun Wu, Qiufeng Yin, Haozhe Zhang, Minjia Zhang, XiaoYun Zhang, Eric Zhu, Rui Zhuang.