mirror of https://github.com/microsoft/autogen.git
add max_depth to xgboost search space (#282)
* add max_depth to xgboost search space * notebook update * two learners for xgboost (max_depth or max_leaves)
This commit is contained in:
parent
d937b03e42
commit
ea6d28d7bd
|
@ -79,7 +79,9 @@ class SearchState:
|
|||
self.learner_class = learner_class
|
||||
search_space = learner_class.search_space(data_size=data_size, task=task)
|
||||
for name, space in search_space.items():
|
||||
assert "domain" in space
|
||||
assert (
|
||||
"domain" in space
|
||||
), f"{name}'s domain is missing in the search space spec {space}"
|
||||
self._search_space_domain[name] = space["domain"]
|
||||
if "init_value" in space:
|
||||
self.init_config[name] = space["init_value"]
|
||||
|
@ -434,7 +436,7 @@ class AutoML(BaseEstimator):
|
|||
|
||||
.. code-block:: python
|
||||
|
||||
['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree']
|
||||
['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']
|
||||
|
||||
time_budget: A float number of the time budget in seconds.
|
||||
Use -1 if no time limit.
|
||||
|
@ -1659,7 +1661,7 @@ class AutoML(BaseEstimator):
|
|||
|
||||
.. code-block:: python
|
||||
|
||||
['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree']
|
||||
['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']
|
||||
|
||||
time_budget: A float number of the time budget in seconds.
|
||||
Use -1 if no time limit.
|
||||
|
@ -1939,16 +1941,29 @@ class AutoML(BaseEstimator):
|
|||
except ImportError:
|
||||
estimator_list = ["arima", "sarimax"]
|
||||
elif self._state.task == "rank":
|
||||
estimator_list = ["lgbm", "xgboost"]
|
||||
estimator_list = ["lgbm", "xgboost", "xgb_limitdepth"]
|
||||
elif _is_nlp_task(self._state.task):
|
||||
estimator_list = ["transformer"]
|
||||
else:
|
||||
try:
|
||||
import catboost
|
||||
|
||||
estimator_list = ["lgbm", "rf", "catboost", "xgboost", "extra_tree"]
|
||||
estimator_list = [
|
||||
"lgbm",
|
||||
"rf",
|
||||
"catboost",
|
||||
"xgboost",
|
||||
"extra_tree",
|
||||
"xgb_limitdepth",
|
||||
]
|
||||
except ImportError:
|
||||
estimator_list = ["lgbm", "rf", "xgboost", "extra_tree"]
|
||||
estimator_list = [
|
||||
"lgbm",
|
||||
"rf",
|
||||
"xgboost",
|
||||
"extra_tree",
|
||||
"xgb_limitdepth",
|
||||
]
|
||||
if "regression" != self._state.task:
|
||||
estimator_list += ["lrl1"]
|
||||
for estimator_name in estimator_list:
|
||||
|
|
|
@ -20,6 +20,7 @@ from sklearn.metrics import (
|
|||
from sklearn.model_selection import RepeatedStratifiedKFold, GroupKFold, TimeSeriesSplit
|
||||
from .model import (
|
||||
XGBoostSklearnEstimator,
|
||||
XGBoostLimitDepthEstimator,
|
||||
RandomForestEstimator,
|
||||
LGBMEstimator,
|
||||
LRL1Classifier,
|
||||
|
@ -42,6 +43,8 @@ def get_estimator_class(task, estimator_name):
|
|||
# when adding a new learner, need to add an elif branch
|
||||
if "xgboost" == estimator_name:
|
||||
estimator_class = XGBoostSklearnEstimator
|
||||
elif "xgb_limitdepth" == estimator_name:
|
||||
estimator_class = XGBoostLimitDepthEstimator
|
||||
elif "rf" == estimator_name:
|
||||
estimator_class = RandomForestEstimator
|
||||
elif "lgbm" == estimator_name:
|
||||
|
|
|
@ -625,7 +625,13 @@ class LGBMEstimator(BaseEstimator):
|
|||
|
||||
@classmethod
|
||||
def size(cls, config):
|
||||
num_leaves = int(round(config.get("num_leaves") or config["max_leaves"]))
|
||||
num_leaves = int(
|
||||
round(
|
||||
config.get("num_leaves")
|
||||
or config.get("max_leaves")
|
||||
or 1 << config["max_depth"]
|
||||
)
|
||||
)
|
||||
n_estimators = int(round(config["n_estimators"]))
|
||||
return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8
|
||||
|
||||
|
@ -794,6 +800,10 @@ class XGBoostEstimator(SKLearnEstimator):
|
|||
"init_value": 4,
|
||||
"low_cost_init_value": 4,
|
||||
},
|
||||
"max_depth": {
|
||||
"domain": tune.choice([0, 6, 12]),
|
||||
"init_value": 0,
|
||||
},
|
||||
"min_child_weight": {
|
||||
"domain": tune.loguniform(lower=0.001, upper=128),
|
||||
"init_value": 1,
|
||||
|
@ -834,11 +844,12 @@ class XGBoostEstimator(SKLearnEstimator):
|
|||
|
||||
def config2params(cls, config: dict) -> dict:
|
||||
params = config.copy()
|
||||
params["max_depth"] = params.get("max_depth", 0)
|
||||
params["grow_policy"] = params.get("grow_policy", "lossguide")
|
||||
params["booster"] = params.get("booster", "gbtree")
|
||||
max_depth = params["max_depth"] = params.get("max_depth", 0)
|
||||
if max_depth == 0:
|
||||
params["grow_policy"] = params.get("grow_policy", "lossguide")
|
||||
params["tree_method"] = params.get("tree_method", "hist")
|
||||
# params["booster"] = params.get("booster", "gbtree")
|
||||
params["use_label_encoder"] = params.get("use_label_encoder", False)
|
||||
params["tree_method"] = params.get("tree_method", "hist")
|
||||
if "n_jobs" in config:
|
||||
params["nthread"] = params.pop("n_jobs")
|
||||
return params
|
||||
|
@ -923,24 +934,25 @@ class XGBoostEstimator(SKLearnEstimator):
|
|||
|
||||
|
||||
class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
|
||||
"""The class for tuning XGBoost (for classification), using sklearn API."""
|
||||
"""The class for tuning XGBoost with unlimited depth, using sklearn API."""
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, **params):
|
||||
return XGBoostEstimator.search_space(data_size)
|
||||
space = XGBoostEstimator.search_space(data_size)
|
||||
space.pop("max_depth")
|
||||
return space
|
||||
|
||||
@classmethod
|
||||
def cost_relative2lgbm(cls):
|
||||
return XGBoostEstimator.cost_relative2lgbm()
|
||||
|
||||
def config2params(cls, config: dict) -> dict:
|
||||
# TODO: test
|
||||
params = config.copy()
|
||||
params["max_depth"] = 0
|
||||
params["grow_policy"] = params.get("grow_policy", "lossguide")
|
||||
params["booster"] = params.get("booster", "gbtree")
|
||||
max_depth = params["max_depth"] = params.get("max_depth", 0)
|
||||
if max_depth == 0:
|
||||
params["grow_policy"] = params.get("grow_policy", "lossguide")
|
||||
params["tree_method"] = params.get("tree_method", "hist")
|
||||
params["use_label_encoder"] = params.get("use_label_encoder", False)
|
||||
params["tree_method"] = params.get("tree_method", "hist")
|
||||
return params
|
||||
|
||||
def __init__(
|
||||
|
@ -968,6 +980,28 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
|
|||
return XGBoostEstimator._callbacks(start_time, deadline)
|
||||
|
||||
|
||||
class XGBoostLimitDepthEstimator(XGBoostSklearnEstimator):
|
||||
"""The class for tuning XGBoost with limited depth, using sklearn API."""
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, **params):
|
||||
space = XGBoostEstimator.search_space(data_size)
|
||||
space.pop("max_leaves")
|
||||
upper = max(6, int(np.log2(data_size)))
|
||||
space["max_depth"] = {
|
||||
"domain": tune.randint(lower=1, upper=min(upper, 16)),
|
||||
"init_value": 6,
|
||||
"low_cost_init_value": 1,
|
||||
}
|
||||
space["learning_rate"]["init_value"] = 0.3
|
||||
space["n_estimators"]["init_value"] = 10
|
||||
return space
|
||||
|
||||
@classmethod
|
||||
def cost_relative2lgbm(cls):
|
||||
return 64
|
||||
|
||||
|
||||
class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
|
||||
"""The class for tuning Random Forest."""
|
||||
|
||||
|
|
|
@ -129,11 +129,11 @@ class FLOW2(Searcher):
|
|||
sampler = sampler.get_sampler()
|
||||
if str(sampler) == "Uniform":
|
||||
self._step_lb = min(
|
||||
self._step_lb, q / (domain.upper - domain.lower)
|
||||
self._step_lb, q / (domain.upper - domain.lower + 1)
|
||||
)
|
||||
elif isinstance(domain, sample.Integer) and str(sampler) == "Uniform":
|
||||
self._step_lb = min(
|
||||
self._step_lb, 1.0 / (domain.upper - 1 - domain.lower)
|
||||
self._step_lb, 1.0 / (domain.upper - domain.lower)
|
||||
)
|
||||
if isinstance(domain, sample.Categorical):
|
||||
if not domain.ordered:
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue