mirror of https://github.com/microsoft/autogen.git
automl fit with starting points (#141)
* add starting point in fit * add estimator best config * add test * add doc string * when there are multiple points_to_evaluate in CFO, use the best one to start local search; after that use low cost partial config as the start point; then, remove the points whose performance is worse than the converged, and start local search from the remaining ones ordered by their performance. Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local> Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
parent
15fd8adac4
commit
e24265ee5d
|
@ -48,7 +48,7 @@ class SearchState:
|
|||
return max(self.time_best_found - self.time_best_found_old,
|
||||
self.total_time_used - self.time_best_found)
|
||||
|
||||
def __init__(self, learner_class, data_size, task):
|
||||
def __init__(self, learner_class, data_size, task, starting_point=None):
|
||||
self.init_eci = learner_class.cost_relative2lgbm()
|
||||
self._search_space_domain = {}
|
||||
self.init_config = {}
|
||||
|
@ -67,8 +67,13 @@ class SearchState:
|
|||
'low_cost_init_value']
|
||||
if 'cat_hp_cost' in space:
|
||||
self.cat_hp_cost[name] = space['cat_hp_cost']
|
||||
# if a starting point is provided, set the init config to be
|
||||
# the starting point provided
|
||||
if starting_point is not None and starting_point.get(name) is not None:
|
||||
self.init_config[name] = starting_point[name]
|
||||
self._hp_names = list(self._search_space_domain.keys())
|
||||
self.search_alg = None
|
||||
self.best_config = None
|
||||
self.best_loss = self.best_loss_old = np.inf
|
||||
self.total_time_used = 0
|
||||
self.total_iter = 0
|
||||
|
@ -328,6 +333,12 @@ class AutoML:
|
|||
'''A dictionary of the best configuration.'''
|
||||
return self._search_states[self._best_estimator].best_config
|
||||
|
||||
@property
|
||||
def best_config_per_estimator(self):
|
||||
'''A dictionary of all estimators' best configuration.'''
|
||||
return {e: e_search_state.best_config for e, e_search_state in
|
||||
self._search_states.items()}
|
||||
|
||||
@property
|
||||
def best_loss(self):
|
||||
'''A float of the best loss found
|
||||
|
@ -811,6 +822,7 @@ class AutoML:
|
|||
split_type="stratified",
|
||||
learner_selector='sample',
|
||||
hpo_method=None,
|
||||
starting_points={},
|
||||
**fit_kwargs):
|
||||
'''Find a model for a given task
|
||||
|
||||
|
@ -873,11 +885,15 @@ class AutoML:
|
|||
X_val: None or a numpy array or a pandas dataframe of validation data
|
||||
y_val: None or a numpy array or a pandas series of validation labels
|
||||
sample_weight_val: None or a numpy array of the sample weight of
|
||||
validation data
|
||||
validation data.
|
||||
groups: None or an array-like of shape (n,) | Group labels for the
|
||||
samples used while splitting the dataset into train/valid set
|
||||
verbose: int, default=1 | Controls the verbosity, higher means more
|
||||
messages
|
||||
messages.
|
||||
starting_points: A dictionary to specify the starting hyperparameter
|
||||
config for the estimators.
|
||||
Keys are the name of the estimators, and values are the starting
|
||||
hyperparamter configurations for the corresponding estimators.
|
||||
**fit_kwargs: Other key word arguments to pass to fit() function of
|
||||
the searched learners, such sample_weight
|
||||
'''
|
||||
|
@ -949,6 +965,7 @@ class AutoML:
|
|||
self._search_states[estimator_name] = SearchState(
|
||||
learner_class=estimator_class,
|
||||
data_size=self._state.data_size, task=self._state.task,
|
||||
starting_point=starting_points.get(estimator_name)
|
||||
)
|
||||
logger.info("List of ML learners in AutoML Run: {}".format(
|
||||
estimator_list))
|
||||
|
|
|
@ -132,6 +132,13 @@ class BlendSearch(Searcher):
|
|||
self._gs = GlobalSearch(space=space, metric=metric, mode=mode)
|
||||
else:
|
||||
self._gs = None
|
||||
if getattr(self, '__name__', None) == 'CFO' and points_to_evaluate and len(
|
||||
points_to_evaluate) > 1:
|
||||
# use the best config in points_to_evaluate as the start point
|
||||
self._candidate_start_points = {}
|
||||
self._started_from_low_cost = not low_cost_partial_config
|
||||
else:
|
||||
self._candidate_start_points = None
|
||||
self._ls = self.LocalSearch(
|
||||
init_config, metric, mode, cat_hp_cost, space, prune_attr,
|
||||
min_resource, max_resource, reduction_factor, self.cost_attr, seed)
|
||||
|
@ -141,27 +148,38 @@ class BlendSearch(Searcher):
|
|||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
config: Optional[Dict] = None) -> bool:
|
||||
metric_changed = mode_changed = False
|
||||
if metric and self._metric != metric:
|
||||
metric_changed = True
|
||||
self._metric = metric
|
||||
if self._metric_constraints:
|
||||
# metric modified by lagrange
|
||||
metric += self.lagrange
|
||||
# TODO: don't change metric for global search methods that
|
||||
# can handle constraints already
|
||||
if mode and self._mode != mode:
|
||||
mode_changed = True
|
||||
self._mode = mode
|
||||
if not self._ls.space:
|
||||
if metric:
|
||||
self._metric = metric
|
||||
if self._metric_constraints:
|
||||
# metric modified by lagrange
|
||||
metric += self.lagrange
|
||||
# TODO: don't change metric for global search methods that
|
||||
# can handle constraints already
|
||||
if mode:
|
||||
self._mode = mode
|
||||
# the search space can be set only once
|
||||
self._ls.set_search_properties(metric, mode, config)
|
||||
if self._gs is not None:
|
||||
self._gs.set_search_properties(metric, mode, config)
|
||||
self._init_search()
|
||||
if 'time_budget_s' in config:
|
||||
time_budget_s = config['time_budget_s']
|
||||
if time_budget_s is not None:
|
||||
self._deadline = time_budget_s + time.time()
|
||||
SearchThread.set_eps(time_budget_s)
|
||||
if 'metric_target' in config:
|
||||
self._metric_target = config.get('metric_target')
|
||||
elif metric_changed or mode_changed:
|
||||
# reset search when metric or mode changed
|
||||
self._ls.set_search_properties(metric, mode)
|
||||
if self._gs is not None:
|
||||
self._gs.set_search_properties(metric, mode)
|
||||
self._init_search()
|
||||
if config:
|
||||
if 'time_budget_s' in config:
|
||||
time_budget_s = config['time_budget_s']
|
||||
if time_budget_s is not None:
|
||||
self._deadline = time_budget_s + time.time()
|
||||
SearchThread.set_eps(time_budget_s)
|
||||
if 'metric_target' in config:
|
||||
self._metric_target = config.get('metric_target')
|
||||
return True
|
||||
|
||||
def _init_search(self):
|
||||
|
@ -220,6 +238,10 @@ class BlendSearch(Searcher):
|
|||
self._metric_constraints = state._metric_constraints
|
||||
self._metric_constraint_satisfied = state._metric_constraint_satisfied
|
||||
self._metric_constraint_penalty = state._metric_constraint_penalty
|
||||
self._candidate_start_points = state._candidate_start_points
|
||||
if self._candidate_start_points:
|
||||
self._started_from_given = state._started_from_given
|
||||
self._started_from_low_cost = state._started_from_low_cost
|
||||
|
||||
@property
|
||||
def metric_target(self):
|
||||
|
@ -267,25 +289,20 @@ class BlendSearch(Searcher):
|
|||
else: # add to result cache
|
||||
self._result[self._ls.config_signature(config)] = result
|
||||
# update target metric if improved
|
||||
objective = result[
|
||||
self._metric + self.lagrange] if self._metric_constraints \
|
||||
else result[self._metric]
|
||||
objective = result[self._ls.metric]
|
||||
if (objective - self._metric_target) * self._ls.metric_op < 0:
|
||||
self._metric_target = objective
|
||||
if not thread_id and metric_constraint_satisfied \
|
||||
and self._create_condition(result):
|
||||
if thread_id == 0 and metric_constraint_satisfied \
|
||||
and self._create_condition(result):
|
||||
# thread creator
|
||||
self._search_thread_pool[self._thread_count] = SearchThread(
|
||||
self._ls.mode,
|
||||
self._ls.create(
|
||||
config, objective,
|
||||
cost=result.get(self.cost_attr, 1)),
|
||||
self.cost_attr
|
||||
)
|
||||
thread_id = self._thread_count
|
||||
self._thread_count += 1
|
||||
self._update_admissible_region(
|
||||
config, self._ls_bound_min, self._ls_bound_max)
|
||||
self._started_from_given = self._candidate_start_points \
|
||||
and trial_id in self._candidate_start_points
|
||||
if self._started_from_given:
|
||||
del self._candidate_start_points[trial_id]
|
||||
else:
|
||||
self._started_from_low_cost = True
|
||||
self._create_thread(config, result)
|
||||
elif thread_id and not self._metric_constraint_satisfied:
|
||||
# no point has been found to satisfy metric constraint
|
||||
self._expand_admissible_region()
|
||||
|
@ -297,6 +314,19 @@ class BlendSearch(Searcher):
|
|||
# local search thread
|
||||
self._clean(thread_id)
|
||||
|
||||
def _create_thread(self, config, result):
|
||||
# logger.info(f"create local search thread from {config}")
|
||||
self._search_thread_pool[self._thread_count] = SearchThread(
|
||||
self._ls.mode,
|
||||
self._ls.create(
|
||||
config, result[self._ls.metric],
|
||||
cost=result.get(self.cost_attr, 1)),
|
||||
self.cost_attr
|
||||
)
|
||||
self._thread_count += 1
|
||||
self._update_admissible_region(
|
||||
config, self._ls_bound_min, self._ls_bound_max)
|
||||
|
||||
def _update_admissible_region(self, config, admissible_min, admissible_max):
|
||||
# update admissible region
|
||||
normalized_config = self._ls.normalize(config)
|
||||
|
@ -315,7 +345,7 @@ class BlendSearch(Searcher):
|
|||
obj_median = np.median(
|
||||
[thread.obj_best1 for id, thread in self._search_thread_pool.items()
|
||||
if id])
|
||||
return result[self._metric] * self._ls.metric_op < obj_median
|
||||
return result[self._ls.metric] * self._ls.metric_op < obj_median
|
||||
|
||||
def _clean(self, thread_id: int):
|
||||
''' delete thread and increase admissible region if converged,
|
||||
|
@ -332,11 +362,47 @@ class BlendSearch(Searcher):
|
|||
if self._inferior(thread_id, id):
|
||||
todelete.add(thread_id)
|
||||
break
|
||||
create_new = False
|
||||
if self._search_thread_pool[thread_id].converged:
|
||||
todelete.add(thread_id)
|
||||
self._expand_admissible_region()
|
||||
if self._candidate_start_points:
|
||||
if not self._started_from_given:
|
||||
# remove start points whose perf is worse than the converged
|
||||
obj = self._search_thread_pool[thread_id].obj_best1
|
||||
worse = [
|
||||
trial_id
|
||||
for trial_id, r in self._candidate_start_points.items()
|
||||
if r and r[self._ls.metric] * self._ls.metric_op >= obj]
|
||||
# logger.info(f"remove candidate start points {worse} than {obj}")
|
||||
for trial_id in worse:
|
||||
del self._candidate_start_points[trial_id]
|
||||
if self._candidate_start_points and self._started_from_low_cost:
|
||||
create_new = True
|
||||
for id in todelete:
|
||||
del self._search_thread_pool[id]
|
||||
if create_new:
|
||||
self._create_thread_from_best_candidate()
|
||||
|
||||
def _create_thread_from_best_candidate(self):
|
||||
# find the best start point
|
||||
best_trial_id = None
|
||||
obj_best = None
|
||||
for trial_id, r in self._candidate_start_points.items():
|
||||
if r and (best_trial_id is None
|
||||
or r[self._ls.metric] * self._ls.metric_op < obj_best):
|
||||
best_trial_id = trial_id
|
||||
obj_best = r[self._ls.metric] * self._ls.metric_op
|
||||
if best_trial_id:
|
||||
# create a new thread
|
||||
config = {}
|
||||
result = self._candidate_start_points[best_trial_id]
|
||||
for key, value in result.items():
|
||||
if key.startswith('config/'):
|
||||
config[key[7:]] = value
|
||||
self._started_from_given = True
|
||||
del self._candidate_start_points[best_trial_id]
|
||||
self._create_thread(config, result)
|
||||
|
||||
def _expand_admissible_region(self):
|
||||
for key in self._ls_bound_max:
|
||||
|
@ -425,6 +491,8 @@ class BlendSearch(Searcher):
|
|||
self._gs_admissible_max.update(self._ls_bound_max)
|
||||
self._result[self._ls.config_signature(config)] = {}
|
||||
else: # use init config
|
||||
if self._candidate_start_points is not None and self._points_to_evaluate:
|
||||
self._candidate_start_points[trial_id] = None
|
||||
init_config = self._points_to_evaluate.pop(
|
||||
0) if self._points_to_evaluate else self._ls.init_config
|
||||
config = self._ls.complete_config(
|
||||
|
@ -624,7 +692,7 @@ class CFO(BlendSearchTuner):
|
|||
# Number of threads is 1 or 2. Thread 0 is a vacuous thread
|
||||
assert len(self._search_thread_pool) < 3, len(self._search_thread_pool)
|
||||
if len(self._search_thread_pool) < 2:
|
||||
# When a local converges, the number of threads is 1
|
||||
# When a local thread converges, the number of threads is 1
|
||||
# Need to restart
|
||||
self._init_used = False
|
||||
return super().suggest(trial_id)
|
||||
|
@ -637,4 +705,28 @@ class CFO(BlendSearchTuner):
|
|||
def _create_condition(self, result: Dict) -> bool:
|
||||
''' create thread condition
|
||||
'''
|
||||
return len(self._search_thread_pool) < 2
|
||||
if self._points_to_evaluate:
|
||||
# still evaluating user-specified init points
|
||||
# we evaluate all candidate start points before we
|
||||
# create the first local search thread
|
||||
return False
|
||||
if len(self._search_thread_pool) == 2:
|
||||
return False
|
||||
if self._candidate_start_points and self._thread_count == 1:
|
||||
# result needs to match or exceed the best candidate start point
|
||||
obj_best = min(
|
||||
self._ls.metric_op * r[self._ls.metric]
|
||||
for r in self._candidate_start_points.values() if r)
|
||||
return result[self._ls.metric] * self._ls.metric_op <= obj_best
|
||||
else:
|
||||
return True
|
||||
|
||||
def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None,
|
||||
error: bool = False):
|
||||
super().on_trial_complete(trial_id, result, error)
|
||||
if self._candidate_start_points \
|
||||
and trial_id in self._candidate_start_points:
|
||||
# the trial is a candidate start point
|
||||
self._candidate_start_points[trial_id] = result
|
||||
if len(self._search_thread_pool) < 2 and not self._points_to_evaluate:
|
||||
self._create_thread_from_best_candidate()
|
||||
|
|
|
@ -129,7 +129,7 @@ FLOW<sup>2</sup> only requires pairwise comparisons between function values to p
|
|||
1. It is applicable to general black-box functions with a good convergence rate in terms of loss.
|
||||
3. It provides theoretical guarantees on the total evaluation cost incurred.
|
||||
|
||||
The GIFs attached below demostrates an example search trajectory of FLOW<sup>2</sup> shown in the loss and evaluation cost (i.e., the training time ) space respectively. From the demonstration, we can see that (1) FLOW<sup>2</sup> can quickly move toward the low-loss region, showing good convergence property and (2) FLOW<sup>2</sup> tends to avoid exploring the high-cost region until necessary.
|
||||
The GIFs attached below demonstrate an example search trajectory of FLOW<sup>2</sup> shown in the loss and evaluation cost (i.e., the training time ) space respectively. From the demonstration, we can see that (1) FLOW<sup>2</sup> can quickly move toward the low-loss region, showing good convergence property and (2) FLOW<sup>2</sup> tends to avoid exploring the high-cost region until necessary.
|
||||
|
||||
<p align="center">
|
||||
<img align="center", src="https://github.com/microsoft/FLAML/blob/main/docs/images/heatmap_loss_cfo_12s.gif" width=360> <img align="center", src="https://github.com/microsoft/FLAML/blob/main/docs/images/heatmap_cost_cfo_12s.gif" width=360>
|
||||
|
|
|
@ -267,8 +267,13 @@ def run(training_function,
|
|||
reduction_factor=reduction_factor,
|
||||
config_constraints=config_constraints,
|
||||
metric_constraints=metric_constraints)
|
||||
else:
|
||||
search_alg.set_search_properties(metric, mode, config)
|
||||
if metric is None or mode is None:
|
||||
metric = metric or search_alg.metric
|
||||
mode = mode or search_alg.mode
|
||||
if time_budget_s:
|
||||
search_alg.set_search_properties(metric, mode, config={
|
||||
search_alg.set_search_properties(None, None, config={
|
||||
'time_budget_s': time_budget_s})
|
||||
scheduler = None
|
||||
if report_intermediate_result:
|
||||
|
|
|
@ -1 +1 @@
|
|||
__version__ = "0.5.10"
|
||||
__version__ = "0.5.11"
|
||||
|
|
|
@ -154,10 +154,10 @@ class TestAutoML(unittest.TestCase):
|
|||
def test_preprocess(self):
|
||||
automl = AutoML()
|
||||
X = pd.DataFrame({
|
||||
'f1': [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
|
||||
'f2': [3., 16., 10., 12., 3., 14., 11., 12., 5., 14., 20., 16., 15., 11.,],
|
||||
'f3': ['a', 'b', 'a', 'c', 'c', 'b', 'b', 'b', 'b', 'a', 'b', 'e', 'e', 'a'],
|
||||
'f4': [True, True, False, True, True, False, False, False, True, True, False, False, True, True],
|
||||
'f1': [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
|
||||
'f2': [3., 16., 10., 12., 3., 14., 11., 12., 5., 14., 20., 16., 15., 11.],
|
||||
'f3': ['a', 'b', 'a', 'c', 'c', 'b', 'b', 'b', 'b', 'a', 'b', 'e', 'e', 'a'],
|
||||
'f4': [True, True, False, True, True, False, False, False, True, True, False, False, True, True],
|
||||
})
|
||||
y = pd.Series([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
|
||||
|
||||
|
@ -476,6 +476,53 @@ class TestAutoML(unittest.TestCase):
|
|||
print(automl_experiment.best_loss)
|
||||
print(automl_experiment.best_config_train_time)
|
||||
|
||||
def test_fit_w_starting_point(self, as_frame=True):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 3,
|
||||
"metric": 'accuracy',
|
||||
"task": 'classification',
|
||||
"log_file_name": "test/iris.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
|
||||
if as_frame:
|
||||
# test drop column
|
||||
X_train.columns = range(X_train.shape[1])
|
||||
X_train[X_train.shape[1]] = np.zeros(len(y_train))
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings)
|
||||
automl_val_accuracy = 1.0 - automl_experiment.best_loss
|
||||
print('Best ML leaner:', automl_experiment.best_estimator)
|
||||
print('Best hyperparmeter config:', automl_experiment.best_config)
|
||||
print('Best accuracy on validation data: {0:.4g}'.format(automl_val_accuracy))
|
||||
print('Training duration of best run: {0:.4g} s'.format(automl_experiment.best_config_train_time))
|
||||
|
||||
starting_points = automl_experiment.best_config_per_estimator
|
||||
print('starting_points', starting_points)
|
||||
automl_settings_resume = {
|
||||
"time_budget": 2,
|
||||
"metric": 'accuracy',
|
||||
"task": 'classification',
|
||||
"log_file_name": "test/iris_resume.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
"log_type": 'all',
|
||||
"starting_points": starting_points,
|
||||
}
|
||||
new_automl_experiment = AutoML()
|
||||
new_automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings_resume)
|
||||
|
||||
new_automl_val_accuracy = 1.0 - new_automl_experiment.best_loss
|
||||
print('Best ML leaner:', new_automl_experiment.best_estimator)
|
||||
print('Best hyperparmeter config:', new_automl_experiment.best_config)
|
||||
print('Best accuracy on validation data: {0:.4g}'.format(new_automl_val_accuracy))
|
||||
print('Training duration of best run: {0:.4g} s'.format(new_automl_experiment.best_config_train_time))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -163,7 +163,7 @@ def _test_xgboost(method='BlendSearch'):
|
|||
|
||||
|
||||
def test_nested():
|
||||
from flaml import tune
|
||||
from flaml import tune, CFO
|
||||
search_space = {
|
||||
# test nested search space
|
||||
"cost_related": {
|
||||
|
@ -178,6 +178,27 @@ def test_nested():
|
|||
tune.report(obj=obj)
|
||||
tune.report(obj=obj, ab=config["cost_related"]["a"] * config["b"])
|
||||
|
||||
analysis = tune.run(
|
||||
simple_func,
|
||||
search_alg=CFO(
|
||||
space=search_space, metric="obj", mode="min",
|
||||
low_cost_partial_config={
|
||||
"cost_related": {"a": 1}
|
||||
},
|
||||
points_to_evaluate=[
|
||||
{"b": .99, "cost_related": {"a": 3}},
|
||||
{"b": .99, "cost_related": {"a": 2}},
|
||||
{"cost_related": {"a": 8}}
|
||||
],
|
||||
metric_constraints=[("ab", "<=", 4)]),
|
||||
local_dir='logs/',
|
||||
num_samples=-1,
|
||||
time_budget_s=.1)
|
||||
|
||||
best_trial = analysis.get_best_trial()
|
||||
logger.info(f"CFO best config: {best_trial.config}")
|
||||
logger.info(f"CFO best result: {best_trial.last_result}")
|
||||
|
||||
analysis = tune.run(
|
||||
simple_func,
|
||||
config=search_space,
|
||||
|
@ -189,11 +210,11 @@ def test_nested():
|
|||
metric_constraints=[("ab", "<=", 4)],
|
||||
local_dir='logs/',
|
||||
num_samples=-1,
|
||||
time_budget_s=1)
|
||||
time_budget_s=.1)
|
||||
|
||||
best_trial = analysis.get_best_trial()
|
||||
logger.info(f"Best config: {best_trial.config}")
|
||||
logger.info(f"Best result: {best_trial.last_result}")
|
||||
logger.info(f"BlendSearch best config: {best_trial.config}")
|
||||
logger.info(f"BlendSearch best result: {best_trial.last_result}")
|
||||
|
||||
|
||||
def test_xgboost_bs():
|
||||
|
|
Loading…
Reference in New Issue