Lgbm w customized obj (#64)

* add customized lgbm learner

* add comments

* fix format issue

* format

* OpenMLError

* add test

* add notebook

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
Qingyun Wu 2021-04-10 21:14:28 -04:00 committed by GitHub
parent 72d17b37c2
commit 06045703bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 365 additions and 392 deletions

View File

@ -362,7 +362,6 @@ class XGBoostEstimator(SKLearnEstimator):
):
super().__init__(task, **params)
self._n_estimators = int(round(n_estimators))
self._max_leaves = int(round(max_leaves))
self.params = {
'max_leaves': int(round(max_leaves)),
'max_depth': params.get('max_depth', 0),
@ -378,6 +377,7 @@ class XGBoostEstimator(SKLearnEstimator):
'booster': params.get('booster', 'gbtree'),
'colsample_bylevel': float(colsample_bylevel),
'colsample_bytree': float(colsample_bytree),
'objective': params.get("objective")
}
if all_thread:
del self.params['nthread']
@ -398,13 +398,19 @@ class XGBoostEstimator(SKLearnEstimator):
else:
dtrain = xgb.DMatrix(X_train, label=y_train)
if self._max_leaves > 0:
self._model = xgb.train(self.params, dtrain, self._n_estimators)
del dtrain
train_time = time.time() - start_time
return train_time
objective = self.params.get('objective')
if isinstance(objective, str):
obj = None
else:
return None
obj = objective
if 'objective' in self.params:
del self.params['objective']
self._model = xgb.train(self.params, dtrain, self._n_estimators,
obj=obj)
self.params['objective'] = objective
del dtrain
train_time = time.time() - start_time
return train_time
def predict(self, X_test):
if not issparse(X_test):

View File

@ -1 +1 @@
__version__ = "0.3.0"
__version__ = "0.3.1"

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -7,7 +7,7 @@ from sklearn.datasets import load_boston, load_iris, load_wine
from flaml import AutoML
from flaml.data import get_output_from_log
from flaml.model import SKLearnEstimator
from flaml.model import SKLearnEstimator, XGBoostEstimator
from rgf.sklearn import RGFClassifier, RGFRegressor
from flaml import tune
@ -65,6 +65,30 @@ class MyRegularizedGreedyForest(SKLearnEstimator):
return 1.0
def logregobj(preds, dtrain):
labels = dtrain.get_label()
preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight
grad = preds - labels
hess = preds * (1.0 - preds)
return grad, hess
class MyXGB1(XGBoostEstimator):
'''XGBoostEstimator with logregobj as the objective function
'''
def __init__(self, **params):
super().__init__(objective=logregobj, **params)
class MyXGB2(XGBoostEstimator):
'''XGBoostEstimator with 'reg:squarederror' as the objective function
'''
def __init__(self, **params):
super().__init__(objective='reg:squarederror', **params)
def custom_metric(X_test, y_test, estimator, labels, X_train, y_train,
weight_test=None, weight_train=None):
from sklearn.metrics import log_loss
@ -345,6 +369,36 @@ class TestAutoML(unittest.TestCase):
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
def test_regression_xgboost(self):
X_train = scipy.sparse.random(300, 900, density=0.0001)
y_train = np.random.uniform(size=300)
X_val = scipy.sparse.random(100, 900, density=0.0001)
y_val = np.random.uniform(size=100)
automl_experiment = AutoML()
automl_experiment.add_learner(learner_name='my_xgb1', learner_class=MyXGB1)
automl_experiment.add_learner(learner_name='my_xgb2', learner_class=MyXGB2)
automl_settings = {
"time_budget": 2,
"estimator_list": ['my_xgb1', 'my_xgb2'],
"task": 'regression',
"log_file_name": 'test/regression_xgboost.log',
"n_jobs": 1,
"model_history": True,
}
automl_experiment.fit(X_train=X_train, y_train=y_train,
X_val=X_val, y_val=y_val,
**automl_settings)
assert automl_experiment._state.X_val.shape == X_val.shape
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
print(automl_experiment.best_config)
print(automl_experiment.best_loss)
print(automl_experiment.best_config_train_time)
if __name__ == "__main__":
unittest.main()

View File

@ -44,7 +44,7 @@ def test_simple(method=None):
}
try:
X, y = fetch_openml(name=dataset, return_X_y=True)
except FileNotFoundError:
except ValueError:
from sklearn.datasets import load_wine
X, y = load_wine(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(