mirror of https://github.com/microsoft/autogen.git
Support parallel and add random search (#167)
* non hashable value out of signature * parallel trials * add random in _search_parallel * fix bug in retraining * check memory constraint before training * retrain_full * log custom metric * retraining budget check * sample size check before retrain * remove 'time2eval' from result * report 'total_search_time' in result * rename total_search_time to wall_clock_time * rename train_loss boolean to log_training_metric * set default train_loss to None * exclude oom result * log retrained model * no subsample * doc str * notebook * predicted value is NaN for sarimax * version Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qxw5138@psu.edu>
This commit is contained in:
parent
3d0a3d26a2
commit
a229a6112a
3466
flaml/automl.py
3466
flaml/automl.py
File diff suppressed because it is too large
Load Diff
|
@ -141,14 +141,14 @@ def get_output_from_log(filename, time_budget):
|
|||
best_config_list = []
|
||||
with training_log_reader(filename) as reader:
|
||||
for record in reader.records():
|
||||
time_used = record.total_search_time
|
||||
time_used = record.wall_clock_time
|
||||
val_loss = record.validation_loss
|
||||
config = record.config
|
||||
learner = record.learner.split('_')[0]
|
||||
sample_size = record.sample_size
|
||||
train_loss = record.logged_metric
|
||||
|
||||
if time_used < time_budget:
|
||||
if time_used < time_budget and np.isfinite(val_loss):
|
||||
if val_loss < best_val_loss:
|
||||
best_val_loss = val_loss
|
||||
best_config = config
|
||||
|
|
48
flaml/ml.py
48
flaml/ml.py
|
@ -102,8 +102,11 @@ def sklearn_metric_loss_score(
|
|||
score = log_loss(
|
||||
y_true, y_predict, labels=labels, sample_weight=sample_weight)
|
||||
elif 'mape' in metric_name:
|
||||
score = mean_absolute_percentage_error(
|
||||
y_true, y_predict)
|
||||
try:
|
||||
score = mean_absolute_percentage_error(
|
||||
y_true, y_predict)
|
||||
except ValueError:
|
||||
return np.inf
|
||||
elif 'micro_f1' in metric_name:
|
||||
score = 1 - f1_score(
|
||||
y_true, y_predict, sample_weight=sample_weight, average='micro')
|
||||
|
@ -141,21 +144,23 @@ def get_y_pred(estimator, X, eval_metric, obj, freq=None):
|
|||
|
||||
def get_test_loss(
|
||||
estimator, X_train, y_train, X_test, y_test, weight_test,
|
||||
eval_metric, obj, labels=None, budget=None, train_loss=False, fit_kwargs={}
|
||||
eval_metric, obj, labels=None, budget=None, log_training_metric=False, fit_kwargs={}
|
||||
):
|
||||
start = time.time()
|
||||
train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
|
||||
estimator.fit(X_train, y_train, budget, **fit_kwargs)
|
||||
if isinstance(eval_metric, str):
|
||||
pred_start = time.time()
|
||||
test_pred_y = get_y_pred(estimator, X_test, eval_metric, obj)
|
||||
pred_time = (time.time() - pred_start) / X_test.shape[0]
|
||||
test_loss = sklearn_metric_loss_score(eval_metric, test_pred_y, y_test,
|
||||
labels, weight_test)
|
||||
if train_loss is not False:
|
||||
if log_training_metric:
|
||||
test_pred_y = get_y_pred(estimator, X_train, eval_metric, obj)
|
||||
train_loss = sklearn_metric_loss_score(
|
||||
eval_metric, test_pred_y,
|
||||
y_train, labels, fit_kwargs.get('sample_weight'))
|
||||
else:
|
||||
train_loss = None
|
||||
else: # customized metric function
|
||||
test_loss, metrics = eval_metric(
|
||||
X_test, y_test, estimator, labels, X_train, y_train,
|
||||
|
@ -174,40 +179,41 @@ def train_model(estimator, X_train, y_train, budget, fit_kwargs={}):
|
|||
|
||||
def evaluate_model(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val,
|
||||
budget, kf, task, eval_method, eval_metric, best_val_loss, train_loss=False,
|
||||
budget, kf, task, eval_method, eval_metric, best_val_loss, log_training_metric=False,
|
||||
fit_kwargs={}
|
||||
):
|
||||
if 'holdout' in eval_method:
|
||||
val_loss, train_loss, train_time, pred_time = evaluate_model_holdout(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val, budget,
|
||||
task, eval_metric, train_loss=train_loss,
|
||||
task, eval_metric, log_training_metric=log_training_metric,
|
||||
fit_kwargs=fit_kwargs)
|
||||
else:
|
||||
val_loss, train_loss, train_time, pred_time = evaluate_model_CV(
|
||||
estimator, X_train, y_train, budget, kf, task,
|
||||
eval_metric, best_val_loss, train_loss=train_loss,
|
||||
eval_metric, best_val_loss, log_training_metric=log_training_metric,
|
||||
fit_kwargs=fit_kwargs)
|
||||
return val_loss, train_loss, train_time, pred_time
|
||||
|
||||
|
||||
def evaluate_model_holdout(
|
||||
estimator, X_train, y_train, X_val, y_val,
|
||||
weight_val, budget, task, eval_metric, train_loss=False,
|
||||
weight_val, budget, task, eval_metric, log_training_metric=False,
|
||||
fit_kwargs={}
|
||||
):
|
||||
val_loss, train_time, train_loss, pred_time = get_test_loss(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val, eval_metric,
|
||||
task, budget=budget, train_loss=train_loss, fit_kwargs=fit_kwargs)
|
||||
task, budget=budget, log_training_metric=log_training_metric, fit_kwargs=fit_kwargs)
|
||||
return val_loss, train_loss, train_time, pred_time
|
||||
|
||||
|
||||
def evaluate_model_CV(
|
||||
estimator, X_train_all, y_train_all, budget, kf,
|
||||
task, eval_metric, best_val_loss, train_loss=False, fit_kwargs={}
|
||||
task, eval_metric, best_val_loss, log_training_metric=False, fit_kwargs={}
|
||||
):
|
||||
start_time = time.time()
|
||||
total_val_loss = 0
|
||||
total_train_loss = None
|
||||
train_loss = None
|
||||
train_time = pred_time = 0
|
||||
valid_fold_num = total_fold_num = 0
|
||||
n = kf.get_n_splits()
|
||||
|
@ -231,7 +237,7 @@ def evaluate_model_CV(
|
|||
kf = kf.split(X_train_split)
|
||||
rng = np.random.RandomState(2020)
|
||||
val_loss_list = []
|
||||
budget_per_train = budget / (n + 1)
|
||||
budget_per_train = budget / n
|
||||
if 'sample_weight' in fit_kwargs:
|
||||
weight = fit_kwargs['sample_weight']
|
||||
weight_val = None
|
||||
|
@ -259,13 +265,13 @@ def evaluate_model_CV(
|
|||
val_loss_i, train_time_i, train_loss_i, pred_time_i = get_test_loss(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val,
|
||||
eval_metric, task, labels, budget_per_train,
|
||||
train_loss=train_loss, fit_kwargs=fit_kwargs)
|
||||
log_training_metric=log_training_metric, fit_kwargs=fit_kwargs)
|
||||
if weight is not None:
|
||||
fit_kwargs['sample_weight'] = weight
|
||||
valid_fold_num += 1
|
||||
total_fold_num += 1
|
||||
total_val_loss += val_loss_i
|
||||
if train_loss is not False:
|
||||
if log_training_metric or not isinstance(eval_metric, str):
|
||||
if isinstance(total_train_loss, list):
|
||||
total_train_loss = [
|
||||
total_train_loss[i] + v for i, v in enumerate(train_loss_i)]
|
||||
|
@ -286,7 +292,7 @@ def evaluate_model_CV(
|
|||
break
|
||||
val_loss = np.max(val_loss_list)
|
||||
n = total_fold_num
|
||||
if train_loss is not False:
|
||||
if log_training_metric or not isinstance(eval_metric, str):
|
||||
if isinstance(total_train_loss, list):
|
||||
train_loss = [v / n for v in total_train_loss]
|
||||
elif isinstance(total_train_loss, dict):
|
||||
|
@ -294,17 +300,17 @@ def evaluate_model_CV(
|
|||
else:
|
||||
train_loss = total_train_loss / n
|
||||
pred_time /= n
|
||||
budget -= time.time() - start_time
|
||||
if val_loss < best_val_loss and budget > budget_per_train:
|
||||
estimator.cleanup()
|
||||
estimator.fit(X_train_all, y_train_all, budget, **fit_kwargs)
|
||||
# budget -= time.time() - start_time
|
||||
# if val_loss < best_val_loss and budget > budget_per_train:
|
||||
# estimator.cleanup()
|
||||
# estimator.fit(X_train_all, y_train_all, budget, **fit_kwargs)
|
||||
return val_loss, train_loss, train_time, pred_time
|
||||
|
||||
|
||||
def compute_estimator(
|
||||
X_train, y_train, X_val, y_val, weight_val, budget, kf,
|
||||
config_dic, task, estimator_name, eval_method, eval_metric,
|
||||
best_val_loss=np.Inf, n_jobs=1, estimator_class=None, train_loss=False,
|
||||
best_val_loss=np.Inf, n_jobs=1, estimator_class=None, log_training_metric=False,
|
||||
fit_kwargs={}
|
||||
):
|
||||
estimator_class = estimator_class or get_estimator_class(
|
||||
|
@ -313,7 +319,7 @@ def compute_estimator(
|
|||
**config_dic, task=task, n_jobs=n_jobs)
|
||||
val_loss, train_loss, train_time, pred_time = evaluate_model(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val, budget, kf, task,
|
||||
eval_method, eval_metric, best_val_loss, train_loss=train_loss,
|
||||
eval_method, eval_metric, best_val_loss, log_training_metric=log_training_metric,
|
||||
fit_kwargs=fit_kwargs)
|
||||
return estimator, val_loss, train_loss, train_time, pred_time
|
||||
|
||||
|
|
|
@ -222,10 +222,10 @@ class LGBMEstimator(BaseEstimator):
|
|||
'domain': tune.loguniform(lower=1 / 1024, upper=1.0),
|
||||
'init_value': 0.1,
|
||||
},
|
||||
'subsample': {
|
||||
'domain': tune.uniform(lower=0.1, upper=1.0),
|
||||
'init_value': 1.0,
|
||||
},
|
||||
# 'subsample': {
|
||||
# 'domain': tune.uniform(lower=0.1, upper=1.0),
|
||||
# 'init_value': 1.0,
|
||||
# },
|
||||
'log_max_bin': {
|
||||
'domain': tune.lograndint(lower=3, upper=11),
|
||||
'init_value': 8,
|
||||
|
@ -252,28 +252,30 @@ class LGBMEstimator(BaseEstimator):
|
|||
|
||||
def __init__(self, task='binary:logistic', log_max_bin=8, **params):
|
||||
super().__init__(task, **params)
|
||||
# Default: ‘regression’ for LGBMRegressor,
|
||||
# ‘binary’ or ‘multiclass’ for LGBMClassifier
|
||||
if 'regression' in task:
|
||||
objective = 'regression'
|
||||
elif 'binary' in task:
|
||||
objective = 'binary'
|
||||
elif 'multi' in task:
|
||||
objective = 'multiclass'
|
||||
else:
|
||||
objective = 'regression'
|
||||
if "objective" not in self.params:
|
||||
# Default: ‘regression’ for LGBMRegressor,
|
||||
# ‘binary’ or ‘multiclass’ for LGBMClassifier
|
||||
if 'regression' in task:
|
||||
objective = 'regression'
|
||||
elif 'binary' in task:
|
||||
objective = 'binary'
|
||||
elif 'multi' in task:
|
||||
objective = 'multiclass'
|
||||
else:
|
||||
objective = 'regression'
|
||||
self.params["objective"] = objective
|
||||
if "n_estimators" in self.params:
|
||||
self.params["n_estimators"] = int(round(self.params["n_estimators"]))
|
||||
if "num_leaves" in self.params:
|
||||
self.params["num_leaves"] = int(round(self.params["num_leaves"]))
|
||||
if "min_child_samples" in self.params:
|
||||
self.params["min_child_samples"] = int(round(self.params["min_child_samples"]))
|
||||
if "objective" not in self.params:
|
||||
self.params["objective"] = objective
|
||||
if "max_bin" not in self.params:
|
||||
self.params['max_bin'] = 1 << int(round(log_max_bin)) - 1
|
||||
if "verbose" not in self.params:
|
||||
self.params['verbose'] = -1
|
||||
# if "subsample_freq" not in self.params:
|
||||
# self.params['subsample_freq'] = 1
|
||||
if 'regression' in task:
|
||||
self.estimator_class = LGBMRegressor
|
||||
else:
|
||||
|
|
|
@ -748,6 +748,7 @@ class AutoTransformers:
|
|||
self._set_metric(custom_metric_name, custom_metric_mode_name)
|
||||
self._set_task()
|
||||
self._fp16 = fp16
|
||||
ray.shutdown()
|
||||
ray.init(local_mode=ray_local_mode)
|
||||
self._set_search_space(**custom_hpo_args)
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
* Licensed under the MIT License. See LICENSE file in the
|
||||
* project root for license information.
|
||||
'''
|
||||
from flaml.tune.sample import Domain
|
||||
from typing import Dict, Optional, Tuple
|
||||
import numpy as np
|
||||
try:
|
||||
|
@ -140,7 +141,7 @@ class FLOW2(Searcher):
|
|||
if str(sampler) != 'Normal':
|
||||
self._bounded_keys.append(key)
|
||||
if not hier:
|
||||
self._space_keys = sorted(self._space.keys())
|
||||
self._space_keys = sorted(self._tunable_keys)
|
||||
self._hierarchical = hier
|
||||
if (self.prune_attr and self.prune_attr not in self._space
|
||||
and self.max_resource):
|
||||
|
@ -499,18 +500,28 @@ class FLOW2(Searcher):
|
|||
else:
|
||||
space = self._space
|
||||
value_list = []
|
||||
# self._space_keys doesn't contain keys with const values,
|
||||
# e.g., "eval_metric": ["logloss", "error"].
|
||||
keys = sorted(config.keys()) if self._hierarchical else self._space_keys
|
||||
for key in keys:
|
||||
value = config[key]
|
||||
if key == self.prune_attr:
|
||||
value_list.append(value)
|
||||
# else key must be in self.space
|
||||
# get rid of list type or constant,
|
||||
# e.g., "eval_metric": ["logloss", "error"]
|
||||
elif isinstance(space[key], sample.Integer):
|
||||
value_list.append(int(round(value)))
|
||||
else:
|
||||
value_list.append(value)
|
||||
# key must be in space
|
||||
domain = space[key]
|
||||
if self._hierarchical:
|
||||
# can't remove constant for hierarchical search space,
|
||||
# e.g., learner
|
||||
if not (domain is None or type(domain) in (str, int, float)
|
||||
or isinstance(domain, sample.Domain)):
|
||||
# not domain or hashable
|
||||
# get rid of list type for hierarchical search space.
|
||||
continue
|
||||
if isinstance(domain, sample.Integer):
|
||||
value_list.append(int(round(value)))
|
||||
else:
|
||||
value_list.append(value)
|
||||
return tuple(value_list)
|
||||
|
||||
@property
|
||||
|
|
|
@ -16,7 +16,7 @@ class TrainingLogRecord(object):
|
|||
iter_per_learner: int,
|
||||
logged_metric: float,
|
||||
trial_time: float,
|
||||
total_search_time: float,
|
||||
wall_clock_time: float,
|
||||
validation_loss,
|
||||
config,
|
||||
best_validation_loss,
|
||||
|
@ -27,7 +27,7 @@ class TrainingLogRecord(object):
|
|||
self.iter_per_learner = iter_per_learner
|
||||
self.logged_metric = logged_metric
|
||||
self.trial_time = trial_time
|
||||
self.total_search_time = total_search_time
|
||||
self.wall_clock_time = wall_clock_time
|
||||
self.validation_loss = validation_loss
|
||||
self.config = config
|
||||
self.best_validation_loss = best_validation_loss
|
||||
|
@ -71,7 +71,7 @@ class TrainingLogWriter(object):
|
|||
it_counter: int,
|
||||
train_loss: float,
|
||||
trial_time: float,
|
||||
total_search_time: float,
|
||||
wall_clock_time: float,
|
||||
validation_loss,
|
||||
config,
|
||||
best_validation_loss,
|
||||
|
@ -86,7 +86,7 @@ class TrainingLogWriter(object):
|
|||
it_counter,
|
||||
train_loss,
|
||||
trial_time,
|
||||
total_search_time,
|
||||
wall_clock_time,
|
||||
validation_loss,
|
||||
config,
|
||||
best_validation_loss,
|
||||
|
@ -95,6 +95,7 @@ class TrainingLogWriter(object):
|
|||
sample_size)
|
||||
if validation_loss < self.current_best_loss or \
|
||||
validation_loss == self.current_best_loss and \
|
||||
self.current_sample_size is not None and \
|
||||
sample_size > self.current_sample_size:
|
||||
self.current_best_loss = validation_loss
|
||||
self.current_sample_size = sample_size
|
||||
|
|
|
@ -363,6 +363,7 @@ def indexof(domain: Dict, config: Dict) -> int:
|
|||
continue
|
||||
# print(domain.const[i])
|
||||
if all(config[key] == value for key, value in domain.const[i].items()):
|
||||
# assumption: the concatenation of constants is a unique identifier
|
||||
return i
|
||||
return None
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
__version__ = "0.5.13"
|
||||
__version__ = "0.6.0"
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -10,7 +10,7 @@ from datetime import datetime
|
|||
from flaml import AutoML
|
||||
from flaml.data import get_output_from_log
|
||||
|
||||
from flaml.model import SKLearnEstimator, XGBoostEstimator
|
||||
from flaml.model import LGBMEstimator, SKLearnEstimator, XGBoostEstimator
|
||||
from rgf.sklearn import RGFClassifier, RGFRegressor
|
||||
from flaml import tune
|
||||
|
||||
|
@ -92,6 +92,24 @@ class MyXGB2(XGBoostEstimator):
|
|||
super().__init__(objective='reg:squarederror', **params)
|
||||
|
||||
|
||||
class MyLargeLGBM(LGBMEstimator):
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, **params):
|
||||
return {
|
||||
'n_estimators': {
|
||||
'domain': tune.lograndint(lower=4, upper=32768),
|
||||
'init_value': 32768,
|
||||
'low_cost_init_value': 4,
|
||||
},
|
||||
'num_leaves': {
|
||||
'domain': tune.lograndint(lower=4, upper=32768),
|
||||
'init_value': 32768,
|
||||
'low_cost_init_value': 4,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def custom_metric(X_test, y_test, estimator, labels, X_train, y_train,
|
||||
weight_test=None, weight_train=None):
|
||||
from sklearn.metrics import log_loss
|
||||
|
@ -477,6 +495,66 @@ class TestAutoML(unittest.TestCase):
|
|||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
|
||||
def test_parallel_xgboost(self, hpo_method=None):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 10,
|
||||
"metric": 'ap',
|
||||
"task": 'classification',
|
||||
"log_file_name": "test/sparse_classification.log",
|
||||
"estimator_list": ["xgboost"],
|
||||
"log_type": "all",
|
||||
"n_jobs": 1,
|
||||
"n_concurrent_trials": 2,
|
||||
"hpo_method": hpo_method,
|
||||
}
|
||||
X_train = scipy.sparse.eye(900000)
|
||||
y_train = np.random.randint(2, size=900000)
|
||||
try:
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings)
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
def test_parallel_xgboost_random(self):
|
||||
# use random search as the hpo_method
|
||||
self.test_parallel_xgboost(hpo_method='random')
|
||||
|
||||
def test_random_out_of_memory(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_experiment.add_learner(learner_name='large_lgbm', learner_class=MyLargeLGBM)
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"metric": 'ap',
|
||||
"task": 'classification',
|
||||
"log_file_name": "test/sparse_classification_oom.log",
|
||||
"estimator_list": ["large_lgbm"],
|
||||
"log_type": "all",
|
||||
"n_jobs": 1,
|
||||
"n_concurrent_trials": 2,
|
||||
"hpo_method": 'random',
|
||||
}
|
||||
|
||||
X_train = scipy.sparse.eye(900000)
|
||||
y_train = np.random.randint(2, size=900000)
|
||||
try:
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings)
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
def test_sparse_matrix_lr(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
|
|
|
@ -17,6 +17,7 @@ def test_automl(budget=5, dataset_format='dataframe'):
|
|||
"metric": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
|
||||
"task": 'classification', # task type
|
||||
"log_file_name": 'airlines_experiment.log', # flaml log file
|
||||
"seed": 7654321, # random seed
|
||||
}
|
||||
'''The main flaml automl API'''
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
|
|
|
@ -45,7 +45,7 @@ class TestLogging(unittest.TestCase):
|
|||
**automl_settings)
|
||||
logger.info(automl.search_space)
|
||||
logger.info(automl.low_cost_partial_config)
|
||||
logger.info(automl.points_to_evalaute)
|
||||
logger.info(automl.points_to_evaluate)
|
||||
logger.info(automl.cat_hp_cost)
|
||||
import optuna as ot
|
||||
study = ot.create_study()
|
||||
|
@ -62,16 +62,18 @@ class TestLogging(unittest.TestCase):
|
|||
config['learner'] = automl.best_estimator
|
||||
automl.trainable({"ml": config})
|
||||
from flaml import tune, CFO
|
||||
from flaml.automl import size
|
||||
from functools import partial
|
||||
search_alg = CFO(
|
||||
metric='val_loss',
|
||||
space=automl.search_space,
|
||||
low_cost_partial_config=automl.low_cost_partial_config,
|
||||
points_to_evaluate=automl.points_to_evalaute,
|
||||
points_to_evaluate=automl.points_to_evaluate,
|
||||
cat_hp_cost=automl.cat_hp_cost,
|
||||
prune_attr=automl.prune_attr,
|
||||
min_resource=automl.min_resource,
|
||||
max_resource=automl.max_resource,
|
||||
config_constraints=[(automl.size, '<=', automl._mem_thres)],
|
||||
config_constraints=[(partial(size, automl._state), '<=', automl._mem_thres)],
|
||||
metric_constraints=automl.metric_constraints)
|
||||
analysis = tune.run(
|
||||
automl.trainable, search_alg=search_alg, # verbose=2,
|
||||
|
|
|
@ -40,6 +40,7 @@ def test_simple(method=None):
|
|||
"n_jobs": 1,
|
||||
"hpo_method": method,
|
||||
"log_type": "all",
|
||||
"retrain_full": "budget",
|
||||
"time_budget": 1
|
||||
}
|
||||
from sklearn.externals._arff import ArffException
|
||||
|
@ -53,21 +54,23 @@ def test_simple(method=None):
|
|||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl.estimator_list)
|
||||
print(automl.search_space)
|
||||
print(automl.points_to_evalaute)
|
||||
print(automl.points_to_evaluate)
|
||||
config = automl.best_config.copy()
|
||||
config['learner'] = automl.best_estimator
|
||||
automl.trainable(config)
|
||||
from flaml import tune
|
||||
from flaml.automl import size
|
||||
from functools import partial
|
||||
analysis = tune.run(
|
||||
automl.trainable, automl.search_space, metric='val_loss', mode="min",
|
||||
low_cost_partial_config=automl.low_cost_partial_config,
|
||||
points_to_evaluate=automl.points_to_evalaute,
|
||||
points_to_evaluate=automl.points_to_evaluate,
|
||||
cat_hp_cost=automl.cat_hp_cost,
|
||||
prune_attr=automl.prune_attr,
|
||||
min_resource=automl.min_resource,
|
||||
max_resource=automl.max_resource,
|
||||
time_budget_s=automl._state.time_budget,
|
||||
config_constraints=[(automl.size, '<=', automl._mem_thres)],
|
||||
config_constraints=[(partial(size, automl._state), '<=', automl._mem_thres)],
|
||||
metric_constraints=automl.metric_constraints, num_samples=5)
|
||||
print(analysis.trials[-1])
|
||||
|
||||
|
|
|
@ -27,6 +27,8 @@ def test_blendsearch_tune(smoke_test=True):
|
|||
except ImportError:
|
||||
print('ray[tune] is not installed, skipping test')
|
||||
return
|
||||
import numpy as np
|
||||
|
||||
algo = BlendSearch()
|
||||
algo = ConcurrencyLimiter(algo, max_concurrent=4)
|
||||
scheduler = AsyncHyperBandScheduler()
|
||||
|
@ -42,7 +44,8 @@ def test_blendsearch_tune(smoke_test=True):
|
|||
"width": tune.uniform(0, 20),
|
||||
"height": tune.uniform(-100, 100),
|
||||
# This is an ignored parameter.
|
||||
"activation": tune.choice(["relu", "tanh"])
|
||||
"activation": tune.choice(["relu", "tanh"]),
|
||||
"test4": np.zeros((3, 1)),
|
||||
})
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
|
|
@ -63,6 +63,7 @@ def _test_xgboost(method='BlendSearch'):
|
|||
time_budget_s = 60
|
||||
for n_cpu in [4]:
|
||||
start_time = time.time()
|
||||
ray.shutdown()
|
||||
ray.init(num_cpus=n_cpu, num_gpus=0)
|
||||
# ray.init(address='auto')
|
||||
if method == 'BlendSearch':
|
||||
|
|
Loading…
Reference in New Issue