mirror of https://github.com/microsoft/autogen.git
* close #249 * admissible region * best_config can be None * optional dependency on lgbm and xgb resolve #252
This commit is contained in:
parent
fe65fa143d
commit
524f22bcc5
|
@ -395,7 +395,8 @@ class AutoML:
|
|||
@property
|
||||
def best_config(self):
|
||||
"""A dictionary of the best configuration."""
|
||||
return self._search_states[self._best_estimator].best_config
|
||||
state = self._search_states.get(self._best_estimator)
|
||||
return state and getattr(state, "best_config", None)
|
||||
|
||||
@property
|
||||
def best_config_per_estimator(self):
|
||||
|
@ -1104,7 +1105,7 @@ class AutoML:
|
|||
(b) otherwise, it is a nested dict with 'ml' as the key, and
|
||||
a list of the low_cost_partial_configs as the value, corresponding
|
||||
to each learner's low_cost_partial_config; the estimator index as
|
||||
an integer corresponding to the cheapest learner is appeneded to the
|
||||
an integer corresponding to the cheapest learner is appended to the
|
||||
list at the end.
|
||||
|
||||
"""
|
||||
|
|
|
@ -4,12 +4,10 @@
|
|||
"""
|
||||
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import time
|
||||
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
|
||||
from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from lightgbm import LGBMClassifier, LGBMRegressor, LGBMRanker
|
||||
from scipy.sparse import issparse
|
||||
import pandas as pd
|
||||
from . import tune
|
||||
|
@ -286,10 +284,16 @@ class LGBMEstimator(BaseEstimator):
|
|||
if "verbose" not in self.params:
|
||||
self.params["verbose"] = -1
|
||||
if "regression" == task:
|
||||
from lightgbm import LGBMRegressor
|
||||
|
||||
self.estimator_class = LGBMRegressor
|
||||
elif "rank" == task:
|
||||
from lightgbm import LGBMRanker
|
||||
|
||||
self.estimator_class = LGBMRanker
|
||||
else:
|
||||
from lightgbm import LGBMClassifier
|
||||
|
||||
self.estimator_class = LGBMClassifier
|
||||
self._time_per_iter = None
|
||||
self._train_size = 0
|
||||
|
@ -432,6 +436,8 @@ class XGBoostEstimator(SKLearnEstimator):
|
|||
self.params["verbosity"] = 0
|
||||
|
||||
def fit(self, X_train, y_train, budget=None, **kwargs):
|
||||
import xgboost as xgb
|
||||
|
||||
start_time = time.time()
|
||||
if issparse(X_train):
|
||||
self.params["tree_method"] = "auto"
|
||||
|
@ -458,6 +464,8 @@ class XGBoostEstimator(SKLearnEstimator):
|
|||
return train_time
|
||||
|
||||
def predict(self, X_test):
|
||||
import xgboost as xgb
|
||||
|
||||
if not issparse(X_test):
|
||||
X_test = self._preprocess(X_test)
|
||||
dtest = xgb.DMatrix(X_test)
|
||||
|
@ -492,6 +500,7 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
|
|||
super().__init__(task, **config)
|
||||
del self.params["verbose"]
|
||||
self.params["verbosity"] = 0
|
||||
import xgboost as xgb
|
||||
|
||||
self.estimator_class = xgb.XGBRegressor
|
||||
if "rank" == task:
|
||||
|
|
|
@ -313,7 +313,13 @@ class BlendSearch(Searcher):
|
|||
{},
|
||||
recursive=True,
|
||||
)
|
||||
self._ls_bound_max = self._ls_bound_min.copy()
|
||||
self._ls_bound_max = normalize(
|
||||
self._ls.init_config.copy(),
|
||||
self._ls.space,
|
||||
self._ls.init_config,
|
||||
{},
|
||||
recursive=True,
|
||||
)
|
||||
self._gs_admissible_min = self._ls_bound_min.copy()
|
||||
self._gs_admissible_max = self._ls_bound_max.copy()
|
||||
self._result = {} # config_signature: tuple -> result: Dict
|
||||
|
@ -492,6 +498,11 @@ class BlendSearch(Searcher):
|
|||
subspace[key],
|
||||
domain[choice],
|
||||
)
|
||||
if len(admissible_max[key]) > len(domain.categories):
|
||||
# points + index
|
||||
normal = (choice + 0.5) / len(domain.categories)
|
||||
admissible_max[key][-1] = max(normal, admissible_max[key][-1])
|
||||
admissible_min[key][-1] = min(normal, admissible_min[key][-1])
|
||||
elif isinstance(value, dict):
|
||||
self._update_admissible_region(
|
||||
value,
|
||||
|
@ -583,6 +594,7 @@ class BlendSearch(Searcher):
|
|||
)
|
||||
|
||||
def _expand_admissible_region(self, lower, upper, space):
|
||||
"""expand the admissible region for the subspace `space`"""
|
||||
for key in upper:
|
||||
ub = upper[key]
|
||||
if isinstance(ub, list):
|
||||
|
|
|
@ -138,7 +138,7 @@ class ChampionFrontierSearcher(BaseSearcher):
|
|||
|
||||
# value: trial_id, key: searcher_trial_id
|
||||
self._trialid_to_searcher_trial_id = {}
|
||||
|
||||
|
||||
self._challenger_list = []
|
||||
# initialize the search in set_search_properties
|
||||
self.set_search_properties(
|
||||
|
|
|
@ -6,7 +6,9 @@
|
|||
import json
|
||||
from typing import IO
|
||||
from contextlib import contextmanager
|
||||
import warnings
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger("flaml.automl")
|
||||
|
||||
|
||||
class TrainingLogRecord(object):
|
||||
|
@ -113,8 +115,8 @@ class TrainingLogWriter(object):
|
|||
if self.file is None:
|
||||
raise IOError("Call open() to open the outpute file first.")
|
||||
if self.current_best_loss_record_id is None:
|
||||
warnings.warn(
|
||||
"checkpoint() called before any record is written, " "skipped."
|
||||
logger.warning(
|
||||
"flaml.training_log: checkpoint() called before any record is written, skipped."
|
||||
)
|
||||
return
|
||||
record = TrainingLogCheckPoint(self.current_best_loss_record_id)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
try:
|
||||
from ray import __version__ as ray_version
|
||||
assert ray_version >= '1.0.0'
|
||||
|
||||
assert ray_version >= "1.0.0"
|
||||
from ray.tune import sample
|
||||
from ray.tune.suggest.variant_generator import generate_variants
|
||||
except (ImportError, AssertionError):
|
||||
|
@ -14,9 +14,7 @@ import logging
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def define_by_run_func(
|
||||
trial, space: Dict, path: str = ""
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
def define_by_run_func(trial, space: Dict, path: str = "") -> Optional[Dict[str, Any]]:
|
||||
"""Define-by-run function to create the search space.
|
||||
|
||||
Returns:
|
||||
|
@ -25,7 +23,7 @@ def define_by_run_func(
|
|||
config = {}
|
||||
for key, domain in space.items():
|
||||
if path:
|
||||
key = path + '/' + key
|
||||
key = path + "/" + key
|
||||
if isinstance(domain, dict):
|
||||
config.update(define_by_run_func(trial, domain, key))
|
||||
continue
|
||||
|
@ -41,40 +39,41 @@ def define_by_run_func(
|
|||
logger.warning(
|
||||
"Optuna does not handle quantization in loguniform "
|
||||
"sampling. The parameter will be passed but it will "
|
||||
"probably be ignored.")
|
||||
"probably be ignored."
|
||||
)
|
||||
if isinstance(domain, sample.Float):
|
||||
if isinstance(sampler, sample.LogUniform):
|
||||
if quantize:
|
||||
logger.warning(
|
||||
"Optuna does not support both quantization and "
|
||||
"sampling from LogUniform. Dropped quantization.")
|
||||
trial.suggest_float(
|
||||
key, domain.lower, domain.upper, log=True)
|
||||
"sampling from LogUniform. Dropped quantization."
|
||||
)
|
||||
trial.suggest_float(key, domain.lower, domain.upper, log=True)
|
||||
elif isinstance(sampler, sample.Uniform):
|
||||
if quantize:
|
||||
trial.suggest_float(
|
||||
key, domain.lower, domain.upper, step=quantize)
|
||||
trial.suggest_float(key, domain.lower, domain.upper, step=quantize)
|
||||
trial.suggest_float(key, domain.lower, domain.upper)
|
||||
elif isinstance(domain, sample.Integer):
|
||||
if isinstance(sampler, sample.LogUniform):
|
||||
trial.suggest_int(
|
||||
key, domain.lower,
|
||||
domain.upper - int(bool(not quantize)),
|
||||
log=True)
|
||||
key, domain.lower, domain.upper - int(bool(not quantize)), log=True
|
||||
)
|
||||
elif isinstance(sampler, sample.Uniform):
|
||||
# Upper bound should be inclusive for quantization and
|
||||
# exclusive otherwise
|
||||
trial.suggest_int(
|
||||
key, domain.lower,
|
||||
key,
|
||||
domain.lower,
|
||||
domain.upper - int(bool(not quantize)),
|
||||
step=quantize or 1)
|
||||
step=quantize or 1,
|
||||
)
|
||||
elif isinstance(domain, sample.Categorical):
|
||||
if isinstance(sampler, sample.Uniform):
|
||||
if not hasattr(domain, 'choices'):
|
||||
if not hasattr(domain, "choices"):
|
||||
domain.choices = list(range(len(domain.categories)))
|
||||
choices = domain.choices
|
||||
# This choice needs to be removed from the final config
|
||||
index = trial.suggest_categorical(key + '_choice_', choices)
|
||||
index = trial.suggest_categorical(key + "_choice_", choices)
|
||||
choice = domain.categories[index]
|
||||
if isinstance(choice, dict):
|
||||
key += f":{index}"
|
||||
|
@ -84,8 +83,9 @@ def define_by_run_func(
|
|||
raise ValueError(
|
||||
"Optuna search does not support parameters of type "
|
||||
"`{}` with samplers of type `{}`".format(
|
||||
type(domain).__name__,
|
||||
type(domain.sampler).__name__))
|
||||
type(domain).__name__, type(domain.sampler).__name__
|
||||
)
|
||||
)
|
||||
# Return all constants in a dictionary.
|
||||
return config
|
||||
|
||||
|
@ -117,18 +117,19 @@ def define_by_run_func(
|
|||
|
||||
|
||||
def unflatten_hierarchical(config: Dict, space: Dict) -> Tuple[Dict, Dict]:
|
||||
'''unflatten hierarchical config'''
|
||||
"""unflatten hierarchical config"""
|
||||
hier = {}
|
||||
subspace = {}
|
||||
for key, value in config.items():
|
||||
if '/' in key:
|
||||
key = key[key.rfind('/') + 1:]
|
||||
if ':' in key:
|
||||
pos = key.rfind(':')
|
||||
if "/" in key:
|
||||
key = key[key.rfind("/") + 1 :]
|
||||
if ":" in key:
|
||||
pos = key.rfind(":")
|
||||
true_key = key[:pos]
|
||||
choice = int(key[pos + 1:])
|
||||
choice = int(key[pos + 1 :])
|
||||
hier[true_key], subspace[true_key] = unflatten_hierarchical(
|
||||
value, space[true_key][choice])
|
||||
value, space[true_key][choice]
|
||||
)
|
||||
else:
|
||||
if key.endswith("_choice_"):
|
||||
key = key[:-8]
|
||||
|
@ -163,8 +164,7 @@ def add_cost_to_space(space: Dict, low_cost_point: Dict, choice_cost: Dict):
|
|||
if isinstance(domain, dict):
|
||||
low_cost = low_cost_point.get(key, {})
|
||||
choice_cost_list = choice_cost.get(key, {})
|
||||
const = add_cost_to_space(
|
||||
domain, low_cost, choice_cost_list)
|
||||
const = add_cost_to_space(domain, low_cost, choice_cost_list)
|
||||
if const:
|
||||
config[key] = const
|
||||
else:
|
||||
|
@ -172,11 +172,11 @@ def add_cost_to_space(space: Dict, low_cost_point: Dict, choice_cost: Dict):
|
|||
continue
|
||||
low_cost = low_cost_point.get(key)
|
||||
choice_cost_list = choice_cost.get(key)
|
||||
if callable(getattr(domain, 'get_sampler', None)):
|
||||
if callable(getattr(domain, "get_sampler", None)):
|
||||
sampler = domain.get_sampler()
|
||||
if isinstance(sampler, sample.Quantized):
|
||||
sampler = sampler.get_sampler()
|
||||
domain.bounded = str(sampler) != 'Normal'
|
||||
domain.bounded = str(sampler) != "Normal"
|
||||
if isinstance(domain, sample.Categorical):
|
||||
domain.const = []
|
||||
for i, cat in enumerate(domain.categories):
|
||||
|
@ -189,8 +189,9 @@ def add_cost_to_space(space: Dict, low_cost_point: Dict, choice_cost: Dict):
|
|||
choice_cost_dict = choice_cost_list[i]
|
||||
else:
|
||||
choice_cost_dict = {}
|
||||
domain.const.append(add_cost_to_space(
|
||||
cat, low_cost_dict, choice_cost_dict))
|
||||
domain.const.append(
|
||||
add_cost_to_space(cat, low_cost_dict, choice_cost_dict)
|
||||
)
|
||||
else:
|
||||
domain.const.append(None)
|
||||
if choice_cost_list:
|
||||
|
@ -205,8 +206,9 @@ def add_cost_to_space(space: Dict, low_cost_point: Dict, choice_cost: Dict):
|
|||
domain.choice_cost = cost[ind]
|
||||
domain.const = [domain.const[i] for i in ind]
|
||||
domain.ordered = True
|
||||
elif all(isinstance(x, int) or isinstance(x, float)
|
||||
for x in domain.categories):
|
||||
elif all(
|
||||
isinstance(x, int) or isinstance(x, float) for x in domain.categories
|
||||
):
|
||||
# sort the choices by value
|
||||
ind = np.argsort(domain.categories)
|
||||
domain.categories = [domain.categories[i] for i in ind]
|
||||
|
@ -214,8 +216,9 @@ def add_cost_to_space(space: Dict, low_cost_point: Dict, choice_cost: Dict):
|
|||
else:
|
||||
domain.ordered = False
|
||||
if low_cost and low_cost not in domain.categories:
|
||||
assert isinstance(low_cost, list), \
|
||||
f"low cost {low_cost} not in domain {domain.categories}"
|
||||
assert isinstance(
|
||||
low_cost, list
|
||||
), f"low cost {low_cost} not in domain {domain.categories}"
|
||||
if domain.ordered:
|
||||
sorted_points = [low_cost[i] for i in ind]
|
||||
for i, point in enumerate(sorted_points):
|
||||
|
@ -231,53 +234,63 @@ def add_cost_to_space(space: Dict, low_cost_point: Dict, choice_cost: Dict):
|
|||
|
||||
|
||||
def normalize(
|
||||
config: Dict, space: Dict, reference_config: Dict,
|
||||
normalized_reference_config: Dict, recursive: bool = False,
|
||||
config: Dict,
|
||||
space: Dict,
|
||||
reference_config: Dict,
|
||||
normalized_reference_config: Dict,
|
||||
recursive: bool = False,
|
||||
):
|
||||
'''normalize config in space according to reference_config.
|
||||
"""normalize config in space according to reference_config.
|
||||
normalize each dimension in config to [0,1].
|
||||
'''
|
||||
"""
|
||||
config_norm = {}
|
||||
for key in config:
|
||||
value = config[key]
|
||||
for key, value in config.items():
|
||||
domain = space.get(key)
|
||||
if domain is None: # e.g., prune_attr
|
||||
config_norm[key] = value
|
||||
continue
|
||||
if not callable(getattr(domain, 'get_sampler', None)):
|
||||
if not callable(getattr(domain, "get_sampler", None)):
|
||||
if recursive and isinstance(domain, dict):
|
||||
config_norm[key] = normalize(
|
||||
value, domain, reference_config[key], {})
|
||||
config_norm[key] = normalize(value, domain, reference_config[key], {})
|
||||
else:
|
||||
config_norm[key] = value
|
||||
continue
|
||||
# domain: sample.Categorical/Integer/Float/Function
|
||||
if isinstance(domain, sample.Categorical):
|
||||
norm = None
|
||||
# value is either one category, or the low_cost_point list
|
||||
# value is: a category, a nested dict, or a low_cost_point list
|
||||
if value not in domain.categories:
|
||||
# nested, low_cost_point list
|
||||
if recursive:
|
||||
# nested
|
||||
if isinstance(value, list):
|
||||
# low_cost_point list
|
||||
norm = []
|
||||
for i, cat in enumerate(domain.categories):
|
||||
norm.append(normalize(
|
||||
value[i], cat, reference_config[key][i], {}))
|
||||
if isinstance(value, list) and len(value) > len(
|
||||
domain.categories):
|
||||
# low_cost_point list
|
||||
index = value[-1]
|
||||
config[key] = value[index]
|
||||
value = domain.categories[index]
|
||||
norm.append(
|
||||
normalize(value[i], cat, reference_config[key][i], {})
|
||||
if recursive
|
||||
else value[i]
|
||||
)
|
||||
if len(value) > len(domain.categories):
|
||||
# the low cost index was appended to low_cost_point list
|
||||
index = value[-1]
|
||||
value = domain.categories[index]
|
||||
elif not recursive:
|
||||
# no low cost index. randomly pick one as init point
|
||||
continue
|
||||
else:
|
||||
# nested dict
|
||||
config_norm[key] = value
|
||||
continue
|
||||
# normalize categorical
|
||||
n = len(domain.categories)
|
||||
if domain.ordered:
|
||||
normalized = (domain.categories.index(value) + 0.5) / n
|
||||
elif key in normalized_reference_config:
|
||||
normalized = normalized_reference_config[
|
||||
key] if value == reference_config[key] else (
|
||||
normalized_reference_config[key] + 1 / n) % 1
|
||||
normalized = (
|
||||
normalized_reference_config[key]
|
||||
if value == reference_config[key]
|
||||
else (normalized_reference_config[key] + 1 / n) % 1
|
||||
)
|
||||
else:
|
||||
normalized = 0.5
|
||||
if norm:
|
||||
|
@ -294,16 +307,19 @@ def normalize(
|
|||
sampler = sampler.get_sampler()
|
||||
else:
|
||||
quantize = None
|
||||
if str(sampler) == 'LogUniform':
|
||||
if str(sampler) == "LogUniform":
|
||||
upper = domain.upper - (
|
||||
isinstance(domain, sample.Integer) & (quantize is None))
|
||||
isinstance(domain, sample.Integer) & (quantize is None)
|
||||
)
|
||||
config_norm[key] = np.log(value / domain.lower) / np.log(
|
||||
upper / domain.lower)
|
||||
elif str(sampler) == 'Uniform':
|
||||
upper / domain.lower
|
||||
)
|
||||
elif str(sampler) == "Uniform":
|
||||
upper = domain.upper - (
|
||||
isinstance(domain, sample.Integer) & (quantize is None))
|
||||
isinstance(domain, sample.Integer) & (quantize is None)
|
||||
)
|
||||
config_norm[key] = (value - domain.lower) / (upper - domain.lower)
|
||||
elif str(sampler) == 'Normal':
|
||||
elif str(sampler) == "Normal":
|
||||
# N(mean, sd) -> N(0,1)
|
||||
config_norm[key] = (value - sampler.mean) / sampler.sd
|
||||
# else:
|
||||
|
@ -312,32 +328,49 @@ def normalize(
|
|||
|
||||
|
||||
def denormalize(
|
||||
config: Dict, space: Dict, reference_config: Dict,
|
||||
normalized_reference_config: Dict, random_state
|
||||
config: Dict,
|
||||
space: Dict,
|
||||
reference_config: Dict,
|
||||
normalized_reference_config: Dict,
|
||||
random_state,
|
||||
):
|
||||
config_denorm = {}
|
||||
for key, value in config.items():
|
||||
if key in space:
|
||||
# domain: sample.Categorical/Integer/Float/Function
|
||||
domain = space[key]
|
||||
if not callable(getattr(domain, 'get_sampler', None)):
|
||||
if isinstance(value, dict) or not callable(
|
||||
getattr(domain, "get_sampler", None)
|
||||
):
|
||||
config_denorm[key] = value
|
||||
else:
|
||||
if isinstance(domain, sample.Categorical):
|
||||
# denormalize categorical
|
||||
n = len(domain.categories)
|
||||
if isinstance(value, list):
|
||||
# denormalize list
|
||||
choice = int(np.floor(value[-1] * n))
|
||||
config_denorm[key] = point = value[choice]
|
||||
point["_choice_"] = choice
|
||||
continue
|
||||
if domain.ordered:
|
||||
config_denorm[key] = domain.categories[
|
||||
min(n - 1, int(np.floor(value * n)))]
|
||||
min(n - 1, int(np.floor(value * n)))
|
||||
]
|
||||
else:
|
||||
assert key in normalized_reference_config
|
||||
if np.floor(value * n) == np.floor(
|
||||
normalized_reference_config[key] * n):
|
||||
normalized_reference_config[key] * n
|
||||
):
|
||||
config_denorm[key] = reference_config[key]
|
||||
else: # ****random value each time!****
|
||||
config_denorm[key] = random_state.choice(
|
||||
[x for x in domain.categories
|
||||
if x != reference_config[key]])
|
||||
[
|
||||
x
|
||||
for x in domain.categories
|
||||
if x != reference_config[key]
|
||||
]
|
||||
)
|
||||
continue
|
||||
# Uniform/LogUniform/Normal/Base
|
||||
sampler = domain.get_sampler()
|
||||
|
@ -348,25 +381,26 @@ def denormalize(
|
|||
else:
|
||||
quantize = None
|
||||
# Handle Log/Uniform
|
||||
if str(sampler) == 'LogUniform':
|
||||
upper = domain.upper - (isinstance(domain, sample.Integer)
|
||||
& (quantize is None))
|
||||
config_denorm[key] = (
|
||||
upper / domain.lower) ** value * domain.lower
|
||||
elif str(sampler) == 'Uniform':
|
||||
upper = domain.upper - (isinstance(domain, sample.Integer)
|
||||
& (quantize is None))
|
||||
config_denorm[key] = value * (
|
||||
upper - domain.lower) + domain.lower
|
||||
elif str(sampler) == 'Normal':
|
||||
if str(sampler) == "LogUniform":
|
||||
upper = domain.upper - (
|
||||
isinstance(domain, sample.Integer) & (quantize is None)
|
||||
)
|
||||
config_denorm[key] = (upper / domain.lower) ** value * domain.lower
|
||||
elif str(sampler) == "Uniform":
|
||||
upper = domain.upper - (
|
||||
isinstance(domain, sample.Integer) & (quantize is None)
|
||||
)
|
||||
config_denorm[key] = value * (upper - domain.lower) + domain.lower
|
||||
elif str(sampler) == "Normal":
|
||||
# denormalization for 'Normal'
|
||||
config_denorm[key] = value * sampler.sd + sampler.mean
|
||||
else:
|
||||
config_denorm[key] = value
|
||||
# Handle quantized
|
||||
if quantize is not None:
|
||||
config_denorm[key] = np.round(
|
||||
np.divide(config_denorm[key], quantize)) * quantize
|
||||
config_denorm[key] = (
|
||||
np.round(np.divide(config_denorm[key], quantize)) * quantize
|
||||
)
|
||||
# Handle int (4.6 -> 5)
|
||||
if isinstance(domain, sample.Integer):
|
||||
config_denorm[key] = int(round(config_denorm[key]))
|
||||
|
@ -376,9 +410,8 @@ def denormalize(
|
|||
|
||||
|
||||
def indexof(domain: Dict, config: Dict) -> int:
|
||||
'''find the index of config in domain.categories
|
||||
'''
|
||||
index = config.get('_choice_')
|
||||
"""find the index of config in domain.categories"""
|
||||
index = config.get("_choice_")
|
||||
if index is not None:
|
||||
return index
|
||||
if config in domain.categories:
|
||||
|
@ -402,45 +435,64 @@ def indexof(domain: Dict, config: Dict) -> int:
|
|||
|
||||
|
||||
def complete_config(
|
||||
partial_config: Dict, space: Dict, flow2, disturb: bool = False,
|
||||
lower: Optional[Dict] = None, upper: Optional[Dict] = None
|
||||
partial_config: Dict,
|
||||
space: Dict,
|
||||
flow2,
|
||||
disturb: bool = False,
|
||||
lower: Optional[Dict] = None,
|
||||
upper: Optional[Dict] = None,
|
||||
) -> Tuple[Dict, Dict]:
|
||||
'''Complete partial config in space
|
||||
"""Complete partial config in space
|
||||
|
||||
Returns:
|
||||
config, space
|
||||
'''
|
||||
"""
|
||||
config = partial_config.copy()
|
||||
normalized = normalize(config, space, config, {})
|
||||
normalized = normalize(config, space, partial_config, {})
|
||||
# print("normalized", normalized)
|
||||
if disturb:
|
||||
for key in normalized:
|
||||
for key, value in normalized.items():
|
||||
domain = space.get(key)
|
||||
if getattr(domain, 'ordered', True) is False:
|
||||
if getattr(domain, "ordered", True) is False:
|
||||
# don't change unordered cat choice
|
||||
continue
|
||||
if not callable(getattr(domain, 'get_sampler', None)):
|
||||
if not callable(getattr(domain, "get_sampler", None)):
|
||||
continue
|
||||
if upper and lower:
|
||||
up, low = upper[key], lower[key]
|
||||
gauss_std = up - low or flow2.STEPSIZE
|
||||
# allowed bound
|
||||
up += flow2.STEPSIZE
|
||||
low -= flow2.STEPSIZE
|
||||
if isinstance(up, list):
|
||||
gauss_std = (up[-1] - low[-1]) or flow2.STEPSIZE
|
||||
up[-1] += flow2.STEPSIZE
|
||||
low[-1] -= flow2.STEPSIZE
|
||||
else:
|
||||
gauss_std = (up - low) or flow2.STEPSIZE
|
||||
# allowed bound
|
||||
up += flow2.STEPSIZE
|
||||
low -= flow2.STEPSIZE
|
||||
elif domain.bounded:
|
||||
up, low, gauss_std = 1, 0, 1.0
|
||||
else:
|
||||
up, low, gauss_std = np.Inf, -np.Inf, 1.0
|
||||
if domain.bounded:
|
||||
up = min(up, 1)
|
||||
low = max(low, 0)
|
||||
if isinstance(up, list):
|
||||
up[-1] = min(up[-1], 1)
|
||||
low[-1] = max(low[-1], 0)
|
||||
else:
|
||||
up = min(up, 1)
|
||||
low = max(low, 0)
|
||||
delta = flow2.rand_vector_gaussian(1, gauss_std)[0]
|
||||
normalized[key] = max(low, min(up, normalized[key] + delta))
|
||||
if isinstance(value, list):
|
||||
# points + normalized index
|
||||
value[-1] = max(low[-1], min(up[-1], value[-1] + delta))
|
||||
else:
|
||||
normalized[key] = max(low, min(up, value + delta))
|
||||
config = denormalize(normalized, space, config, normalized, flow2._random)
|
||||
# print("denormalized", config)
|
||||
for key, value in space.items():
|
||||
if key not in config:
|
||||
config[key] = value
|
||||
for _, generated in generate_variants({'config': config}):
|
||||
config = generated['config']
|
||||
for _, generated in generate_variants({"config": config}):
|
||||
config = generated["config"]
|
||||
break
|
||||
subspace = {}
|
||||
for key, domain in space.items():
|
||||
|
@ -455,16 +507,26 @@ def complete_config(
|
|||
# else:
|
||||
# point = {}
|
||||
config[key], subspace[key] = complete_config(
|
||||
value, domain.categories[index], flow2, disturb,
|
||||
lower and lower[key][index], upper and upper[key][index]
|
||||
value,
|
||||
domain.categories[index],
|
||||
flow2,
|
||||
disturb,
|
||||
lower and lower[key][index],
|
||||
upper and upper[key][index],
|
||||
)
|
||||
assert '_choice_' not in subspace[key], \
|
||||
"_choice_ is a reserved key for hierarchical search space"
|
||||
subspace[key]['_choice_'] = index
|
||||
assert (
|
||||
"_choice_" not in subspace[key]
|
||||
), "_choice_ is a reserved key for hierarchical search space"
|
||||
subspace[key]["_choice_"] = index
|
||||
else:
|
||||
config[key], subspace[key] = complete_config(
|
||||
value, space[key], flow2, disturb,
|
||||
lower and lower[key], upper and upper[key])
|
||||
value,
|
||||
space[key],
|
||||
flow2,
|
||||
disturb,
|
||||
lower and lower[key],
|
||||
upper and upper[key],
|
||||
)
|
||||
continue
|
||||
subspace[key] = domain
|
||||
return config, subspace
|
||||
|
|
|
@ -36,9 +36,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"!pip install flaml[notebook];\n",
|
||||
"# from v0.6.6, catboost is made an optional dependency to build conda package.\n",
|
||||
"# to install catboost, you can uncomment and run:\n",
|
||||
"!pip install flaml[notebook];\r\n",
|
||||
"# from v0.6.6, catboost is made an optional dependency to build conda package.\r\n",
|
||||
"# to install catboost, you can uncomment and run:\r\n",
|
||||
"# !pip install flaml[catboost]"
|
||||
],
|
||||
"outputs": [],
|
||||
|
@ -62,7 +62,7 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"source": [
|
||||
"from flaml.data import load_openml_dataset\n",
|
||||
"from flaml.data import load_openml_dataset\r\n",
|
||||
"X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir='./')"
|
||||
],
|
||||
"outputs": [
|
||||
|
@ -87,8 +87,8 @@
|
|||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Run FLAML\n",
|
||||
"In the FLAML automl run configuration, users can specify the task type, time budget, error metric, learner list, whether to subsample, resampling strategy type, and so on. All these arguments have default values which will be used if users do not provide them. For example, the default ML learners of FLAML are `['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree', 'lrl1']`. "
|
||||
"### Run FLAML\r\n",
|
||||
"In the FLAML automl run configuration, users can specify the task type, time budget, error metric, learner list, whether to subsample, resampling strategy type, and so on. All these arguments have default values which will be used if users do not provide them. For example, the default classifiers are `['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree', 'lrl1']`. "
|
||||
],
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
|
@ -100,8 +100,8 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"source": [
|
||||
"''' import AutoML class from flaml package '''\n",
|
||||
"from flaml import AutoML\n",
|
||||
"''' import AutoML class from flaml package '''\r\n",
|
||||
"from flaml import AutoML\r\n",
|
||||
"automl = AutoML()"
|
||||
],
|
||||
"outputs": [],
|
||||
|
@ -115,13 +115,13 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"source": [
|
||||
"settings = {\n",
|
||||
" \"time_budget\": 240, # total running time in seconds\n",
|
||||
" \"metric\": 'accuracy', # can be: 'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'roc_auc_ovr',\n",
|
||||
" # 'roc_auc_ovo', 'log_loss', 'mape', 'f1', 'ap', 'ndcg', 'micro_f1', 'macro_f1'\n",
|
||||
" \"task\": 'classification', # task type\n",
|
||||
" \"log_file_name\": 'airlines_experiment.log', # flaml log file\n",
|
||||
" \"seed\": 7654321, # random seed\n",
|
||||
"settings = {\r\n",
|
||||
" \"time_budget\": 240, # total running time in seconds\r\n",
|
||||
" \"metric\": 'accuracy', # can be: 'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'roc_auc_ovr',\r\n",
|
||||
" # 'roc_auc_ovo', 'log_loss', 'mape', 'f1', 'ap', 'ndcg', 'micro_f1', 'macro_f1'\r\n",
|
||||
" \"task\": 'classification', # task type\r\n",
|
||||
" \"log_file_name\": 'airlines_experiment.log', # flaml log file\r\n",
|
||||
" \"seed\": 7654321, # random seed\r\n",
|
||||
"}"
|
||||
],
|
||||
"outputs": [],
|
||||
|
@ -135,7 +135,7 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"source": [
|
||||
"'''The main flaml automl API'''\n",
|
||||
"'''The main flaml automl API'''\r\n",
|
||||
"automl.fit(X_train=X_train, y_train=y_train, **settings)"
|
||||
],
|
||||
"outputs": [
|
||||
|
@ -330,10 +330,10 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"source": [
|
||||
"''' retrieve best config and best learner'''\n",
|
||||
"print('Best ML leaner:', automl.best_estimator)\n",
|
||||
"print('Best hyperparmeter config:', automl.best_config)\n",
|
||||
"print('Best accuracy on validation data: {0:.4g}'.format(1-automl.best_loss))\n",
|
||||
"''' retrieve best config and best learner'''\r\n",
|
||||
"print('Best ML leaner:', automl.best_estimator)\r\n",
|
||||
"print('Best hyperparmeter config:', automl.best_config)\r\n",
|
||||
"print('Best accuracy on validation data: {0:.4g}'.format(1-automl.best_loss))\r\n",
|
||||
"print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))"
|
||||
],
|
||||
"outputs": [
|
||||
|
@ -387,9 +387,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"source": [
|
||||
"''' pickle and save the automl object '''\n",
|
||||
"import pickle\n",
|
||||
"with open('automl.pkl', 'wb') as f:\n",
|
||||
"''' pickle and save the automl object '''\r\n",
|
||||
"import pickle\r\n",
|
||||
"with open('automl.pkl', 'wb') as f:\r\n",
|
||||
" pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)"
|
||||
],
|
||||
"outputs": [],
|
||||
|
@ -403,10 +403,10 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"source": [
|
||||
"''' compute predictions of testing dataset ''' \n",
|
||||
"y_pred = automl.predict(X_test)\n",
|
||||
"print('Predicted labels', y_pred)\n",
|
||||
"print('True labels', y_test)\n",
|
||||
"''' compute predictions of testing dataset ''' \r\n",
|
||||
"y_pred = automl.predict(X_test)\r\n",
|
||||
"print('Predicted labels', y_pred)\r\n",
|
||||
"print('True labels', y_test)\r\n",
|
||||
"y_pred_proba = automl.predict_proba(X_test)[:,1]"
|
||||
],
|
||||
"outputs": [
|
||||
|
@ -442,10 +442,10 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"source": [
|
||||
"''' compute different metric values on testing dataset'''\n",
|
||||
"from flaml.ml import sklearn_metric_loss_score\n",
|
||||
"print('accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred, y_test))\n",
|
||||
"print('roc_auc', '=', 1 - sklearn_metric_loss_score('roc_auc', y_pred_proba, y_test))\n",
|
||||
"''' compute different metric values on testing dataset'''\r\n",
|
||||
"from flaml.ml import sklearn_metric_loss_score\r\n",
|
||||
"print('accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred, y_test))\r\n",
|
||||
"print('roc_auc', '=', 1 - sklearn_metric_loss_score('roc_auc', y_pred_proba, y_test))\r\n",
|
||||
"print('log_loss', '=', sklearn_metric_loss_score('log_loss', y_pred_proba, y_test))"
|
||||
],
|
||||
"outputs": [
|
||||
|
@ -483,10 +483,10 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"source": [
|
||||
"from flaml.data import get_output_from_log\n",
|
||||
"time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\n",
|
||||
" get_output_from_log(filename=settings['log_file_name'], time_budget=240)\n",
|
||||
"for config in config_history:\n",
|
||||
"from flaml.data import get_output_from_log\r\n",
|
||||
"time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\r\n",
|
||||
" get_output_from_log(filename=settings['log_file_name'], time_budget=240)\r\n",
|
||||
"for config in config_history:\r\n",
|
||||
" print(config)"
|
||||
],
|
||||
"outputs": [
|
||||
|
@ -518,14 +518,14 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"plt.title('Learning Curve')\n",
|
||||
"plt.xlabel('Wall Clock Time (s)')\n",
|
||||
"plt.ylabel('Validation Accuracy')\n",
|
||||
"plt.scatter(time_history, 1 - np.array(valid_loss_history))\n",
|
||||
"plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post')\n",
|
||||
"import matplotlib.pyplot as plt\r\n",
|
||||
"import numpy as np\r\n",
|
||||
"\r\n",
|
||||
"plt.title('Learning Curve')\r\n",
|
||||
"plt.xlabel('Wall Clock Time (s)')\r\n",
|
||||
"plt.ylabel('Validation Accuracy')\r\n",
|
||||
"plt.scatter(time_history, 1 - np.array(valid_loss_history))\r\n",
|
||||
"plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post')\r\n",
|
||||
"plt.show()"
|
||||
],
|
||||
"outputs": [
|
||||
|
@ -566,7 +566,7 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"source": [
|
||||
"from lightgbm import LGBMClassifier\n",
|
||||
"from lightgbm import LGBMClassifier\r\n",
|
||||
"lgbm = LGBMClassifier()"
|
||||
],
|
||||
"outputs": [],
|
||||
|
@ -612,11 +612,11 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"source": [
|
||||
"from xgboost import XGBClassifier\n",
|
||||
"xgb = XGBClassifier()\n",
|
||||
"cat_columns = X_train.select_dtypes(include=['category']).columns\n",
|
||||
"X = X_train.copy()\n",
|
||||
"X[cat_columns] = X[cat_columns].apply(lambda x: x.cat.codes)\n"
|
||||
"from xgboost import XGBClassifier\r\n",
|
||||
"xgb = XGBClassifier()\r\n",
|
||||
"cat_columns = X_train.select_dtypes(include=['category']).columns\r\n",
|
||||
"X = X_train.copy()\r\n",
|
||||
"X[cat_columns] = X[cat_columns].apply(lambda x: x.cat.codes)\r\n"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {}
|
||||
|
@ -652,8 +652,8 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"source": [
|
||||
"X = X_test.copy()\n",
|
||||
"X[cat_columns] = X[cat_columns].apply(lambda x: x.cat.codes)\n",
|
||||
"X = X_test.copy()\r\n",
|
||||
"X[cat_columns] = X[cat_columns].apply(lambda x: x.cat.codes)\r\n",
|
||||
"y_pred_xgb = xgb.predict(X)"
|
||||
],
|
||||
"outputs": [],
|
||||
|
@ -663,8 +663,8 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"source": [
|
||||
"print('default xgboost accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred_xgb, y_test))\n",
|
||||
"print('default lgbm accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred_lgbm, y_test))\n",
|
||||
"print('default xgboost accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred_xgb, y_test))\r\n",
|
||||
"print('default lgbm accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred_lgbm, y_test))\r\n",
|
||||
"print('flaml (4min) accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred, y_test))"
|
||||
],
|
||||
"outputs": [
|
||||
|
@ -727,73 +727,77 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"source": [
|
||||
"''' SKLearnEstimator is the super class for a sklearn learner '''\n",
|
||||
"from flaml.model import SKLearnEstimator\n",
|
||||
"from flaml import tune\n",
|
||||
"from rgf.sklearn import RGFClassifier, RGFRegressor\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class MyRegularizedGreedyForest(SKLearnEstimator):\n",
|
||||
" def __init__(self, task='binary', **config):\n",
|
||||
" '''Constructor\n",
|
||||
" \n",
|
||||
" Args:\n",
|
||||
" task: A string of the task type, one of\n",
|
||||
" 'binary', 'multi', 'regression'\n",
|
||||
" config: A dictionary containing the hyperparameter names\n",
|
||||
" and 'n_jobs' as keys. n_jobs is the number of parallel threads.\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" super().__init__(task, **config)\n",
|
||||
"\n",
|
||||
" '''task=binary or multi for classification task'''\n",
|
||||
" if task in (\"binary\", \"multi\"):\n",
|
||||
" self.estimator_class = RGFClassifier\n",
|
||||
" else:\n",
|
||||
" self.estimator_class = RGFRegressor\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def search_space(cls, data_size, task):\n",
|
||||
" '''[required method] search space\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" A dictionary of the search space. \n",
|
||||
" Each key is the name of a hyperparameter, and value is a dict with\n",
|
||||
" its domain (required) and low_cost_init_value, init_value,\n",
|
||||
" cat_hp_cost (if applicable).\n",
|
||||
" e.g.,\n",
|
||||
" {'domain': tune.randint(lower=1, upper=10), 'init_value': 1}.\n",
|
||||
" '''\n",
|
||||
" space = { \n",
|
||||
" 'max_leaf': {'domain': tune.lograndint(lower=4, upper=data_size), 'init_value': 4, 'low_cost_init_value': 4},\n",
|
||||
" 'n_iter': {'domain': tune.lograndint(lower=1, upper=data_size), 'init_value': 1, 'low_cost_init_value': 1},\n",
|
||||
" 'n_tree_search': {'domain': tune.lograndint(lower=1, upper=32768), 'init_value': 1, 'low_cost_init_value': 1},\n",
|
||||
" 'opt_interval': {'domain': tune.lograndint(lower=1, upper=10000), 'init_value': 100},\n",
|
||||
" 'learning_rate': {'domain': tune.loguniform(lower=0.01, upper=20.0)},\n",
|
||||
" 'min_samples_leaf': {'domain': tune.lograndint(lower=1, upper=20), 'init_value': 20},\n",
|
||||
" }\n",
|
||||
" return space\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def size(cls, config):\n",
|
||||
" '''[optional method] memory size of the estimator in bytes\n",
|
||||
" \n",
|
||||
" Args:\n",
|
||||
" config - the dict of the hyperparameter config\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" A float of the memory size required by the estimator to train the\n",
|
||||
" given config\n",
|
||||
" '''\n",
|
||||
" max_leaves = int(round(config['max_leaf']))\n",
|
||||
" n_estimators = int(round(config['n_iter']))\n",
|
||||
" return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def cost_relative2lgbm(cls):\n",
|
||||
" '''[optional method] relative cost compared to lightgbm\n",
|
||||
" '''\n",
|
||||
" return 1.0\n"
|
||||
"''' SKLearnEstimator is the super class for a sklearn learner '''\r\n",
|
||||
"from flaml.model import SKLearnEstimator\r\n",
|
||||
"from flaml import tune\r\n",
|
||||
"from flaml.data import CLASSIFICATION\r\n",
|
||||
"\r\n",
|
||||
"\r\n",
|
||||
"class MyRegularizedGreedyForest(SKLearnEstimator):\r\n",
|
||||
" def __init__(self, task='binary', **config):\r\n",
|
||||
" '''Constructor\r\n",
|
||||
" \r\n",
|
||||
" Args:\r\n",
|
||||
" task: A string of the task type, one of\r\n",
|
||||
" 'binary', 'multi', 'regression'\r\n",
|
||||
" config: A dictionary containing the hyperparameter names\r\n",
|
||||
" and 'n_jobs' as keys. n_jobs is the number of parallel threads.\r\n",
|
||||
" '''\r\n",
|
||||
"\r\n",
|
||||
" super().__init__(task, **config)\r\n",
|
||||
"\r\n",
|
||||
" '''task=binary or multi for classification task'''\r\n",
|
||||
" if task in CLASSIFICATION:\r\n",
|
||||
" from rgf.sklearn import RGFClassifier\r\n",
|
||||
"\r\n",
|
||||
" self.estimator_class = RGFClassifier\r\n",
|
||||
" else:\r\n",
|
||||
" from rgf.sklearn import RGFRegressor\r\n",
|
||||
" \r\n",
|
||||
" self.estimator_class = RGFRegressor\r\n",
|
||||
"\r\n",
|
||||
" @classmethod\r\n",
|
||||
" def search_space(cls, data_size, task):\r\n",
|
||||
" '''[required method] search space\r\n",
|
||||
"\r\n",
|
||||
" Returns:\r\n",
|
||||
" A dictionary of the search space. \r\n",
|
||||
" Each key is the name of a hyperparameter, and value is a dict with\r\n",
|
||||
" its domain (required) and low_cost_init_value, init_value,\r\n",
|
||||
" cat_hp_cost (if applicable).\r\n",
|
||||
" e.g.,\r\n",
|
||||
" {'domain': tune.randint(lower=1, upper=10), 'init_value': 1}.\r\n",
|
||||
" '''\r\n",
|
||||
" space = { \r\n",
|
||||
" 'max_leaf': {'domain': tune.lograndint(lower=4, upper=data_size), 'init_value': 4, 'low_cost_init_value': 4},\r\n",
|
||||
" 'n_iter': {'domain': tune.lograndint(lower=1, upper=data_size), 'init_value': 1, 'low_cost_init_value': 1},\r\n",
|
||||
" 'n_tree_search': {'domain': tune.lograndint(lower=1, upper=32768), 'init_value': 1, 'low_cost_init_value': 1},\r\n",
|
||||
" 'opt_interval': {'domain': tune.lograndint(lower=1, upper=10000), 'init_value': 100},\r\n",
|
||||
" 'learning_rate': {'domain': tune.loguniform(lower=0.01, upper=20.0)},\r\n",
|
||||
" 'min_samples_leaf': {'domain': tune.lograndint(lower=1, upper=20), 'init_value': 20},\r\n",
|
||||
" }\r\n",
|
||||
" return space\r\n",
|
||||
"\r\n",
|
||||
" @classmethod\r\n",
|
||||
" def size(cls, config):\r\n",
|
||||
" '''[optional method] memory size of the estimator in bytes\r\n",
|
||||
" \r\n",
|
||||
" Args:\r\n",
|
||||
" config - the dict of the hyperparameter config\r\n",
|
||||
"\r\n",
|
||||
" Returns:\r\n",
|
||||
" A float of the memory size required by the estimator to train the\r\n",
|
||||
" given config\r\n",
|
||||
" '''\r\n",
|
||||
" max_leaves = int(round(config['max_leaf']))\r\n",
|
||||
" n_estimators = int(round(config['n_iter']))\r\n",
|
||||
" return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8\r\n",
|
||||
"\r\n",
|
||||
" @classmethod\r\n",
|
||||
" def cost_relative2lgbm(cls):\r\n",
|
||||
" '''[optional method] relative cost compared to lightgbm\r\n",
|
||||
" '''\r\n",
|
||||
" return 1.0\r\n"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
|
@ -819,7 +823,7 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"source": [
|
||||
"automl = AutoML()\n",
|
||||
"automl = AutoML()\r\n",
|
||||
"automl.add_learner(learner_name='RGF', learner_class=MyRegularizedGreedyForest)"
|
||||
],
|
||||
"outputs": [],
|
||||
|
@ -833,15 +837,15 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"source": [
|
||||
"settings = {\n",
|
||||
" \"time_budget\": 10, # total running time in seconds\n",
|
||||
" \"metric\": 'accuracy', \n",
|
||||
" \"estimator_list\": ['RGF', 'lgbm', 'rf', 'xgboost'], # list of ML learners\n",
|
||||
" \"task\": 'classification', # task type \n",
|
||||
" \"log_file_name\": 'airlines_experiment_custom_learner.log', # flaml log file \n",
|
||||
" \"log_training_metric\": True, # whether to log training metric\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"settings = {\r\n",
|
||||
" \"time_budget\": 10, # total running time in seconds\r\n",
|
||||
" \"metric\": 'accuracy', \r\n",
|
||||
" \"estimator_list\": ['RGF', 'lgbm', 'rf', 'xgboost'], # list of ML learners\r\n",
|
||||
" \"task\": 'classification', # task type \r\n",
|
||||
" \"log_file_name\": 'airlines_experiment_custom_learner.log', # flaml log file \r\n",
|
||||
" \"log_training_metric\": True, # whether to log training metric\r\n",
|
||||
"}\r\n",
|
||||
"\r\n",
|
||||
"automl.fit(X_train = X_train, y_train = y_train, **settings)"
|
||||
],
|
||||
"outputs": [
|
||||
|
|
|
@ -13,10 +13,9 @@ import pandas as pd
|
|||
from datetime import datetime
|
||||
|
||||
from flaml import AutoML
|
||||
from flaml.data import get_output_from_log
|
||||
from flaml.data import CLASSIFICATION, get_output_from_log
|
||||
|
||||
from flaml.model import LGBMEstimator, SKLearnEstimator, XGBoostEstimator
|
||||
from rgf.sklearn import RGFClassifier, RGFRegressor
|
||||
from flaml import tune
|
||||
from flaml.training_log import training_log_reader
|
||||
|
||||
|
@ -26,9 +25,13 @@ class MyRegularizedGreedyForest(SKLearnEstimator):
|
|||
|
||||
super().__init__(task, **config)
|
||||
|
||||
if task in ("binary", "multi"):
|
||||
if task in CLASSIFICATION:
|
||||
from rgf.sklearn import RGFClassifier
|
||||
|
||||
self.estimator_class = RGFClassifier
|
||||
else:
|
||||
from rgf.sklearn import RGFRegressor
|
||||
|
||||
self.estimator_class = RGFRegressor
|
||||
|
||||
@classmethod
|
||||
|
@ -628,7 +631,7 @@ class TestAutoML(unittest.TestCase):
|
|||
"log_file_name": "test/california.log",
|
||||
"log_type": "all",
|
||||
"n_jobs": 1,
|
||||
"n_concurrent_trials": 2,
|
||||
"n_concurrent_trials": 10,
|
||||
"hpo_method": hpo_method,
|
||||
}
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True)
|
||||
|
|
|
@ -109,4 +109,4 @@ def test_mlflow():
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_automl(300)
|
||||
test_automl(120)
|
||||
|
|
|
@ -64,18 +64,20 @@ class TestLogging(unittest.TestCase):
|
|||
automl.search_space, automl.low_cost_partial_config, automl.cat_hp_cost
|
||||
)
|
||||
logger.info(automl.search_space["ml"].categories)
|
||||
config = automl.best_config.copy()
|
||||
config["learner"] = automl.best_estimator
|
||||
automl.trainable({"ml": config})
|
||||
if automl.best_config:
|
||||
config = automl.best_config.copy()
|
||||
config["learner"] = automl.best_estimator
|
||||
automl.trainable({"ml": config})
|
||||
from flaml import tune, BlendSearch
|
||||
from flaml.automl import size
|
||||
from functools import partial
|
||||
|
||||
low_cost_partial_config = automl.low_cost_partial_config
|
||||
search_alg = BlendSearch(
|
||||
metric="val_loss",
|
||||
mode="min",
|
||||
space=automl.search_space,
|
||||
low_cost_partial_config=automl.low_cost_partial_config,
|
||||
low_cost_partial_config=low_cost_partial_config,
|
||||
points_to_evaluate=automl.points_to_evaluate,
|
||||
cat_hp_cost=automl.cat_hp_cost,
|
||||
prune_attr=automl.prune_attr,
|
||||
|
@ -95,6 +97,14 @@ class TestLogging(unittest.TestCase):
|
|||
print(min(trial.last_result["val_loss"] for trial in analysis.trials))
|
||||
config = analysis.trials[-1].last_result["config"]["ml"]
|
||||
automl._state._train_with_config(config["learner"], config)
|
||||
for _ in range(3):
|
||||
print(
|
||||
search_alg._ls.complete_config(
|
||||
low_cost_partial_config,
|
||||
search_alg._ls_bound_min,
|
||||
search_alg._ls_bound_max,
|
||||
)
|
||||
)
|
||||
# Check if the log buffer is populated.
|
||||
self.assertTrue(len(buf.getvalue()) > 0)
|
||||
|
||||
|
|
Loading…
Reference in New Issue