mirror of https://github.com/microsoft/autogen.git
Lgbm w customized obj (#64)
* add customized lgbm learner * add comments * fix format issue * format * OpenMLError * add test * add notebook Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
parent
72d17b37c2
commit
06045703bf
|
@ -362,7 +362,6 @@ class XGBoostEstimator(SKLearnEstimator):
|
|||
):
|
||||
super().__init__(task, **params)
|
||||
self._n_estimators = int(round(n_estimators))
|
||||
self._max_leaves = int(round(max_leaves))
|
||||
self.params = {
|
||||
'max_leaves': int(round(max_leaves)),
|
||||
'max_depth': params.get('max_depth', 0),
|
||||
|
@ -378,6 +377,7 @@ class XGBoostEstimator(SKLearnEstimator):
|
|||
'booster': params.get('booster', 'gbtree'),
|
||||
'colsample_bylevel': float(colsample_bylevel),
|
||||
'colsample_bytree': float(colsample_bytree),
|
||||
'objective': params.get("objective")
|
||||
}
|
||||
if all_thread:
|
||||
del self.params['nthread']
|
||||
|
@ -398,13 +398,19 @@ class XGBoostEstimator(SKLearnEstimator):
|
|||
else:
|
||||
dtrain = xgb.DMatrix(X_train, label=y_train)
|
||||
|
||||
if self._max_leaves > 0:
|
||||
self._model = xgb.train(self.params, dtrain, self._n_estimators)
|
||||
del dtrain
|
||||
train_time = time.time() - start_time
|
||||
return train_time
|
||||
objective = self.params.get('objective')
|
||||
if isinstance(objective, str):
|
||||
obj = None
|
||||
else:
|
||||
return None
|
||||
obj = objective
|
||||
if 'objective' in self.params:
|
||||
del self.params['objective']
|
||||
self._model = xgb.train(self.params, dtrain, self._n_estimators,
|
||||
obj=obj)
|
||||
self.params['objective'] = objective
|
||||
del dtrain
|
||||
train_time = time.time() - start_time
|
||||
return train_time
|
||||
|
||||
def predict(self, X_test):
|
||||
if not issparse(X_test):
|
||||
|
|
|
@ -1 +1 @@
|
|||
__version__ = "0.3.0"
|
||||
__version__ = "0.3.1"
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -7,7 +7,7 @@ from sklearn.datasets import load_boston, load_iris, load_wine
|
|||
from flaml import AutoML
|
||||
from flaml.data import get_output_from_log
|
||||
|
||||
from flaml.model import SKLearnEstimator
|
||||
from flaml.model import SKLearnEstimator, XGBoostEstimator
|
||||
from rgf.sklearn import RGFClassifier, RGFRegressor
|
||||
from flaml import tune
|
||||
|
||||
|
@ -65,6 +65,30 @@ class MyRegularizedGreedyForest(SKLearnEstimator):
|
|||
return 1.0
|
||||
|
||||
|
||||
def logregobj(preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight
|
||||
grad = preds - labels
|
||||
hess = preds * (1.0 - preds)
|
||||
return grad, hess
|
||||
|
||||
|
||||
class MyXGB1(XGBoostEstimator):
|
||||
'''XGBoostEstimator with logregobj as the objective function
|
||||
'''
|
||||
|
||||
def __init__(self, **params):
|
||||
super().__init__(objective=logregobj, **params)
|
||||
|
||||
|
||||
class MyXGB2(XGBoostEstimator):
|
||||
'''XGBoostEstimator with 'reg:squarederror' as the objective function
|
||||
'''
|
||||
|
||||
def __init__(self, **params):
|
||||
super().__init__(objective='reg:squarederror', **params)
|
||||
|
||||
|
||||
def custom_metric(X_test, y_test, estimator, labels, X_train, y_train,
|
||||
weight_test=None, weight_train=None):
|
||||
from sklearn.metrics import log_loss
|
||||
|
@ -345,6 +369,36 @@ class TestAutoML(unittest.TestCase):
|
|||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
|
||||
def test_regression_xgboost(self):
|
||||
X_train = scipy.sparse.random(300, 900, density=0.0001)
|
||||
y_train = np.random.uniform(size=300)
|
||||
X_val = scipy.sparse.random(100, 900, density=0.0001)
|
||||
y_val = np.random.uniform(size=100)
|
||||
automl_experiment = AutoML()
|
||||
automl_experiment.add_learner(learner_name='my_xgb1', learner_class=MyXGB1)
|
||||
automl_experiment.add_learner(learner_name='my_xgb2', learner_class=MyXGB2)
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"estimator_list": ['my_xgb1', 'my_xgb2'],
|
||||
"task": 'regression',
|
||||
"log_file_name": 'test/regression_xgboost.log',
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
X_val=X_val, y_val=y_val,
|
||||
**automl_settings)
|
||||
assert automl_experiment._state.X_val.shape == X_val.shape
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
print(automl_experiment.best_config)
|
||||
print(automl_experiment.best_loss)
|
||||
print(automl_experiment.best_config_train_time)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -44,7 +44,7 @@ def test_simple(method=None):
|
|||
}
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
except FileNotFoundError:
|
||||
except ValueError:
|
||||
from sklearn.datasets import load_wine
|
||||
X, y = load_wine(return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
|
|
Loading…
Reference in New Issue