mirror of https://github.com/microsoft/autogen.git
logging (#371)
* query logged runs * mlflow log when using ray * key check for newer version of ray #363 * catch importerror * log and load AutoML model * retrain if necessary when ensemble fails
This commit is contained in:
parent
9c00e4272a
commit
8602def1c4
|
@ -698,6 +698,10 @@ class AutoML(BaseEstimator):
|
|||
return attr.classes_.tolist()
|
||||
return None
|
||||
|
||||
@property
|
||||
def n_features_in_(self):
|
||||
return self._trained_estimator.n_features_in_
|
||||
|
||||
@property
|
||||
def time_to_find_best_model(self) -> float:
|
||||
"""Time taken to find best model in seconds."""
|
||||
|
@ -2160,7 +2164,6 @@ class AutoML(BaseEstimator):
|
|||
del self._state.y_train, self._state.y_train_all, self._state.y_val
|
||||
del self._sample_weight_full, self._state.fit_kwargs
|
||||
del self._state.groups, self._state.groups_all, self._state.groups_val
|
||||
# if verbose == 0:
|
||||
logger.setLevel(old_level)
|
||||
|
||||
def _search_parallel(self):
|
||||
|
@ -2247,7 +2250,7 @@ class AutoML(BaseEstimator):
|
|||
trial
|
||||
for trial in analysis.trials
|
||||
if trial.last_result
|
||||
and trial.last_result["wall_clock_time"] is not None
|
||||
and trial.last_result.get("wall_clock_time") is not None
|
||||
),
|
||||
key=lambda x: x.last_result["wall_clock_time"],
|
||||
)
|
||||
|
@ -2259,8 +2262,9 @@ class AutoML(BaseEstimator):
|
|||
estimator = config.get("ml", config)["learner"]
|
||||
search_state = self._search_states[estimator]
|
||||
search_state.update(result, 0)
|
||||
if result["wall_clock_time"] is not None:
|
||||
self._state.time_from_start = result["wall_clock_time"]
|
||||
wall_time = result.get("wall_clock_time")
|
||||
if wall_time is not None:
|
||||
self._state.time_from_start = wall_time
|
||||
if search_state.sample_size == self._state.data_size[0]:
|
||||
self._iter_per_learner[estimator] += 1
|
||||
if not self._fullsize_reached:
|
||||
|
@ -2278,17 +2282,34 @@ class AutoML(BaseEstimator):
|
|||
self._time_taken_best_iter = self._state.time_from_start
|
||||
better = True
|
||||
self._search_states[estimator].best_config = config
|
||||
if (better or self._log_type == "all") and self._training_log:
|
||||
self._training_log.append(
|
||||
self._iter_per_learner[estimator],
|
||||
search_state.metric_for_logging,
|
||||
search_state.trial_time,
|
||||
self._state.time_from_start,
|
||||
search_state.val_loss,
|
||||
config,
|
||||
estimator,
|
||||
search_state.sample_size,
|
||||
)
|
||||
if better or self._log_type == "all":
|
||||
self._log_trial(search_state, estimator)
|
||||
|
||||
def _log_trial(self, search_state, estimator):
|
||||
if self._training_log:
|
||||
self._training_log.append(
|
||||
self._iter_per_learner[estimator],
|
||||
search_state.metric_for_logging,
|
||||
search_state.trial_time,
|
||||
self._state.time_from_start,
|
||||
search_state.val_loss,
|
||||
search_state.config,
|
||||
estimator,
|
||||
search_state.sample_size,
|
||||
)
|
||||
if mlflow is not None and mlflow.active_run():
|
||||
with mlflow.start_run(nested=True):
|
||||
mlflow.log_metric("iter_counter", self._iter_per_learner[estimator])
|
||||
mlflow.log_param("metric_for_logging", search_state.metric_for_logging)
|
||||
mlflow.log_metric("trial_time", search_state.trial_time)
|
||||
mlflow.log_metric("wall_clock_time", self._state.time_from_start)
|
||||
mlflow.log_metric("validation_loss", search_state.val_loss)
|
||||
mlflow.log_param("config", search_state.config)
|
||||
mlflow.log_param("learner", estimator)
|
||||
mlflow.log_param("sample_size", search_state.sample_size)
|
||||
mlflow.log_metric("best_validation_loss", search_state.best_loss)
|
||||
mlflow.log_param("best_config", search_state.best_config)
|
||||
mlflow.log_param("best_learner", self._best_estimator)
|
||||
|
||||
def _search_sequential(self):
|
||||
try:
|
||||
|
@ -2461,8 +2482,9 @@ class AutoML(BaseEstimator):
|
|||
f"Estimated sufficient time budget={max_budget:.0f}s."
|
||||
f" Estimated necessary time budget={min_budget:.0f}s."
|
||||
)
|
||||
if result["wall_clock_time"] is not None:
|
||||
self._state.time_from_start = result["wall_clock_time"]
|
||||
wall_time = result.get("wall_clock_time")
|
||||
if wall_time is not None:
|
||||
self._state.time_from_start = wall_time
|
||||
# logger.info(f"{self._search_states[estimator].sample_size}, {data_size}")
|
||||
if search_state.sample_size == self._state.data_size[0]:
|
||||
self._iter_per_learner[estimator] += 1
|
||||
|
@ -2500,38 +2522,8 @@ class AutoML(BaseEstimator):
|
|||
):
|
||||
search_state.trained_estimator.cleanup()
|
||||
if better or self._log_type == "all":
|
||||
if self._training_log:
|
||||
self._training_log.append(
|
||||
self._iter_per_learner[estimator],
|
||||
search_state.metric_for_logging,
|
||||
search_state.trial_time,
|
||||
self._state.time_from_start,
|
||||
search_state.val_loss,
|
||||
search_state.config,
|
||||
estimator,
|
||||
search_state.sample_size,
|
||||
)
|
||||
if mlflow is not None and mlflow.active_run():
|
||||
with mlflow.start_run(nested=True):
|
||||
mlflow.log_metric(
|
||||
"iter_counter", self._iter_per_learner[estimator]
|
||||
)
|
||||
mlflow.log_param(
|
||||
"metric_for_logging", search_state.metric_for_logging
|
||||
)
|
||||
mlflow.log_metric("trial_time", search_state.trial_time)
|
||||
mlflow.log_metric(
|
||||
"wall_clock_time", self._state.time_from_start
|
||||
)
|
||||
mlflow.log_metric("validation_loss", search_state.val_loss)
|
||||
mlflow.log_param("config", search_state.config)
|
||||
mlflow.log_param("learner", estimator)
|
||||
mlflow.log_param("sample_size", search_state.sample_size)
|
||||
mlflow.log_metric(
|
||||
"best_validation_loss", search_state.best_loss
|
||||
)
|
||||
mlflow.log_param("best_config", search_state.best_config)
|
||||
mlflow.log_param("best_learner", self._best_estimator)
|
||||
self._log_trial(search_state, estimator)
|
||||
|
||||
logger.info(
|
||||
" at {:.1f}s,\testimator {}'s best error={:.4f},\tbest estimator {}'s best error={:.4f}".format(
|
||||
self._state.time_from_start,
|
||||
|
@ -2640,6 +2632,7 @@ class AutoML(BaseEstimator):
|
|||
)
|
||||
if self._trained_estimator:
|
||||
logger.info(f"selected model: {self._trained_estimator.model}")
|
||||
estimators = []
|
||||
if self._ensemble and self._state.task in (
|
||||
"binary",
|
||||
"multi",
|
||||
|
@ -2673,8 +2666,7 @@ class AutoML(BaseEstimator):
|
|||
if x[1].best_loss < 4 * self._selected.best_loss
|
||||
]
|
||||
logger.info(estimators)
|
||||
if len(estimators) <= 1:
|
||||
return
|
||||
if len(estimators) > 1:
|
||||
if self._state.task in CLASSIFICATION:
|
||||
from sklearn.ensemble import StackingClassifier as Stacker
|
||||
else:
|
||||
|
@ -2732,6 +2724,7 @@ class AutoML(BaseEstimator):
|
|||
if (
|
||||
self._state.task == TS_FORECAST
|
||||
or self._trained_estimator is None
|
||||
or self._trained_estimator.model is None
|
||||
or (
|
||||
self._state.time_budget - self._state.time_from_start
|
||||
> self._selected.est_retrain_time(self.data_size_full)
|
||||
|
@ -2758,8 +2751,6 @@ class AutoML(BaseEstimator):
|
|||
logger.info(f"retrained model: {self._trained_estimator.model}")
|
||||
else:
|
||||
logger.info("not retraining because the time budget is too small.")
|
||||
if self.model and mlflow is not None and mlflow.active_run():
|
||||
mlflow.sklearn.log_model(self.model, "best_model")
|
||||
|
||||
def __del__(self):
|
||||
if (
|
||||
|
|
|
@ -116,7 +116,7 @@ class BaseEstimator:
|
|||
|
||||
@property
|
||||
def n_features_in_(self):
|
||||
return self.model.n_features_in_
|
||||
return self._model.n_features_in_
|
||||
|
||||
@property
|
||||
def model(self):
|
||||
|
|
|
@ -1 +1 @@
|
|||
__version__ = "0.9.2"
|
||||
__version__ = "0.9.3"
|
||||
|
|
|
@ -2,6 +2,11 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved. \n",
|
||||
"\n",
|
||||
|
@ -22,105 +27,106 @@
|
|||
"\n",
|
||||
"In this notebook, we use one real data example (binary classification) to showcase how to use FLAML library together with AzureML.\n",
|
||||
"\n",
|
||||
"FLAML requires `Python>=3.6`. To run this notebook example, please install flaml with the `notebook` and `azureml` option:\n",
|
||||
"FLAML requires `Python>=3.6`. To run this notebook example, please install flaml with the [azureml] option:\n",
|
||||
"```bash\n",
|
||||
"pip install flaml[notebook,azureml]\n",
|
||||
"pip install flaml[azureml]\n",
|
||||
"```"
|
||||
],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"!pip install flaml[notebook,azureml]"
|
||||
],
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"metadata": {}
|
||||
"source": [
|
||||
"!pip install flaml[azureml]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Enable mlflow in AzureML workspace"
|
||||
],
|
||||
"metadata": {}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import mlflow\n",
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## 2. Classification Example\n",
|
||||
"### Load data and preprocess\n",
|
||||
"\n",
|
||||
"Download [Airlines dataset](https://www.openml.org/d/1169) from OpenML. The task is to predict whether a given flight will be delayed, given the information of the scheduled departure."
|
||||
],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"from flaml.data import load_openml_dataset\n",
|
||||
"X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir='./')"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "subslide"
|
||||
},
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from flaml.data import load_openml_dataset\n",
|
||||
"X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir='./')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Run FLAML\n",
|
||||
"In the FLAML automl run configuration, users can specify the task type, time budget, error metric, learner list, whether to subsample, resampling strategy type, and so on. All these arguments have default values which will be used if users do not provide them. For example, the default ML learners of FLAML are `['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree', 'lrl1']`. "
|
||||
],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"### Run FLAML\n",
|
||||
"In the FLAML automl run configuration, users can specify the task type, time budget, error metric, learner list, whether to subsample, resampling strategy type, and so on. All these arguments have default values which will be used if users do not provide them. For example, the default ML learners of FLAML are `['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree', 'lrl1']`. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"''' import AutoML class from flaml package '''\n",
|
||||
"from flaml import AutoML\n",
|
||||
"automl = AutoML()"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"settings = {\n",
|
||||
" \"time_budget\": 60, # total running time in seconds\n",
|
||||
|
@ -131,181 +137,77 @@
|
|||
" \"sample\": False, # whether to subsample training data\n",
|
||||
" \"log_file_name\": 'airlines_experiment.log', # flaml log file\n",
|
||||
"}"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mlflow.set_experiment(\"flaml\")\n",
|
||||
"experiment = mlflow.set_experiment(\"flaml\")\n",
|
||||
"with mlflow.start_run() as run:\n",
|
||||
" '''The main flaml automl API'''\n",
|
||||
" automl.fit(X_train=X_train, y_train=y_train, **settings)"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
},
|
||||
"tags": []
|
||||
}
|
||||
" automl.fit(X_train=X_train, y_train=y_train, **settings)\n",
|
||||
" # log the model\n",
|
||||
" mlflow.sklearn.log_model(automl, \"automl\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Best model and metric"
|
||||
],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
}
|
||||
"### Load the model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"''' retrieve best config and best learner'''\n",
|
||||
"print('Best ML leaner:', automl.best_estimator)\n",
|
||||
"print('Best hyperparmeter config:', automl.best_config)\n",
|
||||
"print('Best accuracy on validation data: {0:.4g}'.format(1 - automl.best_loss))\n",
|
||||
"print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))"
|
||||
],
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
},
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"automl.model"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"''' pickle and save the automl object '''\n",
|
||||
"import pickle\n",
|
||||
"with open('automl.pkl', 'wb') as f:\n",
|
||||
" pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"''' compute predictions of testing dataset ''' \n",
|
||||
"y_pred = automl.predict(X_test)\n",
|
||||
"print('Predicted labels', y_pred)\n",
|
||||
"print('True labels', y_test)\n",
|
||||
"y_pred_proba = automl.predict_proba(X_test)[:,1]"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
},
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"''' compute different metric values on testing dataset'''\n",
|
||||
"from flaml.ml import sklearn_metric_loss_score\n",
|
||||
"print('accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred, y_test))\n",
|
||||
"print('roc_auc', '=', 1 - sklearn_metric_loss_score('roc_auc', y_pred_proba, y_test))\n",
|
||||
"print('log_loss', '=', sklearn_metric_loss_score('log_loss', y_pred_proba, y_test))"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
},
|
||||
"tags": []
|
||||
}
|
||||
"automl = mlflow.sklearn.load_model(f\"{run.info.artifact_uri}/automl\")\n",
|
||||
"print(automl.predict_proba(X_test))\n",
|
||||
"print(automl.predict(X_test))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Log history"
|
||||
],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"### Retrieve logs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"from flaml.data import get_output_from_log\n",
|
||||
"time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\n",
|
||||
" get_output_from_log(filename = settings['log_file_name'], time_budget = 60)\n",
|
||||
"\n",
|
||||
"for config in config_history:\n",
|
||||
" print(config)"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "subslide"
|
||||
},
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"plt.title('Learning Curve')\n",
|
||||
"plt.xlabel('Wall Clock Time (s)')\n",
|
||||
"plt.ylabel('Validation Accuracy')\n",
|
||||
"plt.scatter(time_history, 1 - np.array(valid_loss_history))\n",
|
||||
"plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post')\n",
|
||||
"plt.show()"
|
||||
],
|
||||
},
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
}
|
||||
"source": [
|
||||
"mlflow.search_runs(experiment_ids=[experiment.experiment_id], filter_string=\"params.learner = 'xgboost'\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"interpreter": {
|
||||
"hash": "0cfea3304185a9579d09e0953576b57c8581e46e6ebc6dfeb681bc5a511f7544"
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3.8.0 64-bit ('blend': conda)"
|
||||
"display_name": "Python 3.8.0 64-bit ('blend': conda)",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
@ -317,12 +219,9 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.0"
|
||||
},
|
||||
"interpreter": {
|
||||
"hash": "0cfea3304185a9579d09e0953576b57c8581e46e6ebc6dfeb681bc5a511f7544"
|
||||
"version": "3.9.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
|
|
|
@ -99,6 +99,7 @@ class TestClassification(unittest.TestCase):
|
|||
"ensemble": True,
|
||||
}
|
||||
automl.fit(X, y, **automl_settings)
|
||||
assert automl.model is not None
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
|
|
|
@ -101,13 +101,25 @@ def test_mlflow():
|
|||
"log_file_name": "adult.log", # flaml log file
|
||||
}
|
||||
mlflow.set_experiment("flaml")
|
||||
with mlflow.start_run():
|
||||
"""The main flaml automl API"""
|
||||
with mlflow.start_run() as run:
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
# subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
|
||||
mlflow.sklearn.log_model(automl, "automl")
|
||||
loaded_model = mlflow.pyfunc.load_model(f"{run.info.artifact_uri}/automl")
|
||||
print(loaded_model.predict(X_test))
|
||||
automl._mem_thres = 0
|
||||
print(automl.trainable(automl.points_to_evaluate[0]))
|
||||
|
||||
settings["use_ray"] = True
|
||||
try:
|
||||
with mlflow.start_run() as run:
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
mlflow.sklearn.log_model(automl, "automl")
|
||||
automl = mlflow.sklearn.load_model(f"{run.info.artifact_uri}/automl")
|
||||
print(automl.predict_proba(X_test))
|
||||
except ImportError:
|
||||
pass
|
||||
# subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_automl(120)
|
||||
|
|
|
@ -28,12 +28,11 @@ mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())
|
|||
|
||||
```python
|
||||
from flaml.data import load_openml_dataset
|
||||
from flaml import AutoML
|
||||
|
||||
# Download [Airlines dataset](https://www.openml.org/d/1169) from OpenML. The task is to predict whether a given flight will be delayed, given the information of the scheduled departure.
|
||||
X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./")
|
||||
|
||||
from flaml import AutoML
|
||||
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": 60, # total running time in seconds
|
||||
|
@ -41,12 +40,23 @@ settings = {
|
|||
"task": "classification", # task type
|
||||
"log_file_name": "airlines_experiment.log", # flaml log file
|
||||
}
|
||||
mlflow.set_experiment("flaml") # the experiment name in AzureML workspace
|
||||
experiment = mlflow.set_experiment("flaml") # the experiment name in AzureML workspace
|
||||
with mlflow.start_run() as run: # create a mlflow run
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
mlflow.sklearn.log_model(automl, "automl")
|
||||
```
|
||||
|
||||
The metrics in the run will be automatically logged in an experiment named "flaml" in your AzureML workspace.
|
||||
The metrics in the run will be automatically logged in an experiment named "flaml" in your AzureML workspace. They can be retrieved by `mlflow.search_runs`:
|
||||
|
||||
```python
|
||||
mlflow.search_runs(experiment_ids=[experiment.experiment_id], filter_string="params.learner = 'xgboost'")
|
||||
```
|
||||
|
||||
The logged model can be loaded and used to make predictions:
|
||||
```python
|
||||
automl = mlflow.sklearn.load_model(f"{run.info.artifact_uri}/automl")
|
||||
print(automl.predict(X_test))
|
||||
```
|
||||
|
||||
[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_azureml.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/integrate_azureml.ipynb)
|
||||
|
||||
|
|
Loading…
Reference in New Issue