init value type match (#575)

* init value type match

* bump version to 1.0.6

* add a note about flaml version in notebook

* add note about mismatched ITER_HP

* catch SSLError when accessing OpenML data

* catch errors in autovw test

Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu>
This commit is contained in:
Chi Wang 2022-06-09 08:11:15 -07:00 committed by GitHub
parent 619107edf5
commit 0642b6e7bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 42 additions and 22 deletions

View File

@ -89,7 +89,12 @@ class SearchState:
renamed_type = list(
inspect.signature(domain_one_dim.is_valid).parameters.values()
)[0].annotation
type_match = renamed_type == Any or isinstance(value_one_dim, renamed_type)
type_match = (
renamed_type == Any
or isinstance(value_one_dim, renamed_type)
or isinstance(value_one_dim, int)
and renamed_type is float
)
if not (type_match and domain_one_dim.is_valid(value_one_dim)):
return False
elif value_one_dim != domain_one_dim:
@ -1498,6 +1503,10 @@ class AutoML(BaseEstimator):
):
"""Retrain from log file.
This function is intended to retrain the logged configurations.
NOTE: In some rare case, the last config is early stopped to meet time_budget and it's the best config.
But the logged config's ITER_HP (e.g., n_estimators) is not reduced.
Args:
log_file_name: A string of the log file name.
X_train: A numpy array or dataframe of training data in shape n*m.

View File

@ -948,7 +948,7 @@ class LGBMEstimator(BaseEstimator):
"low_cost_init_value": 4,
},
"min_child_samples": {
"domain": tune.lograndint(lower=2, upper=2 ** 7 + 1),
"domain": tune.lograndint(lower=2, upper=2**7 + 1),
"init_value": 20,
},
"learning_rate": {
@ -1047,7 +1047,6 @@ class LGBMEstimator(BaseEstimator):
self.params[self.ITER_HP] = 1
self._t1 = self._fit(X_train, y_train, **kwargs)
if budget is not None and self._t1 >= budget or n_iter == 1:
# self.params[self.ITER_HP] = n_iter
return self._t1
mem1 = psutil.virtual_memory().available if psutil is not None else 1
self._mem1 = mem0 - mem1
@ -1168,7 +1167,7 @@ class XGBoostEstimator(SKLearnEstimator):
},
"min_child_weight": {
"domain": tune.loguniform(lower=0.001, upper=128),
"init_value": 1,
"init_value": 1.0,
},
"learning_rate": {
"domain": tune.loguniform(lower=1 / 1024, upper=1.0),
@ -1797,17 +1796,17 @@ class ARIMA(Prophet):
def search_space(cls, **params):
space = {
"p": {
"domain": tune.quniform(lower=0, upper=10, q=1),
"domain": tune.qrandint(lower=0, upper=10, q=1),
"init_value": 2,
"low_cost_init_value": 0,
},
"d": {
"domain": tune.quniform(lower=0, upper=10, q=1),
"domain": tune.qrandint(lower=0, upper=10, q=1),
"init_value": 2,
"low_cost_init_value": 0,
},
"q": {
"domain": tune.quniform(lower=0, upper=10, q=1),
"domain": tune.qrandint(lower=0, upper=10, q=1),
"init_value": 1,
"low_cost_init_value": 0,
},
@ -1884,32 +1883,32 @@ class SARIMAX(ARIMA):
def search_space(cls, **params):
space = {
"p": {
"domain": tune.quniform(lower=0, upper=10, q=1),
"domain": tune.qrandint(lower=0, upper=10, q=1),
"init_value": 2,
"low_cost_init_value": 0,
},
"d": {
"domain": tune.quniform(lower=0, upper=10, q=1),
"domain": tune.qrandint(lower=0, upper=10, q=1),
"init_value": 2,
"low_cost_init_value": 0,
},
"q": {
"domain": tune.quniform(lower=0, upper=10, q=1),
"domain": tune.qrandint(lower=0, upper=10, q=1),
"init_value": 1,
"low_cost_init_value": 0,
},
"P": {
"domain": tune.quniform(lower=0, upper=10, q=1),
"domain": tune.qrandint(lower=0, upper=10, q=1),
"init_value": 1,
"low_cost_init_value": 0,
},
"D": {
"domain": tune.quniform(lower=0, upper=10, q=1),
"domain": tune.qrandint(lower=0, upper=10, q=1),
"init_value": 1,
"low_cost_init_value": 0,
},
"Q": {
"domain": tune.quniform(lower=0, upper=10, q=1),
"domain": tune.qrandint(lower=0, upper=10, q=1),
"init_value": 1,
"low_cost_init_value": 0,
},

View File

@ -1 +1 @@
__version__ = "1.0.5"
__version__ = "1.0.6"

View File

@ -131,7 +131,8 @@
}
],
"source": [
"!pip install flaml[notebook,ts_forecast]"
"%pip install flaml[notebook,ts_forecast]\n",
"# avoid version 1.0.2 to 1.0.5 for this notebook due to a bug for arima and sarimax's init config"
]
},
{

View File

@ -1,6 +1,6 @@
import sys
from openml.exceptions import OpenMLServerException
from requests.exceptions import ChunkedEncodingError
from requests.exceptions import ChunkedEncodingError, SSLError
def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
@ -23,6 +23,7 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
OpenMLServerException,
ChunkedEncodingError,
urllib3.exceptions.ReadTimeoutError,
SSLError,
) as e:
print(e)
return
@ -110,7 +111,7 @@ def test_mlflow():
X_train, X_test, y_train, y_test = load_openml_task(
task_id=7592, data_dir="test/"
)
except (OpenMLServerException, ChunkedEncodingError) as e:
except (OpenMLServerException, ChunkedEncodingError, SSLError) as e:
print(e)
return
""" import AutoML class from flaml package """

View File

@ -56,6 +56,7 @@ class TestRegression(unittest.TestCase):
y_pred = automl.predict(X_train)
print(y_pred)
print(automl.model.estimator)
n_iter = automl.model.estimator.get_params("n_estimators")
print(automl.config_history)
print(automl.best_model_for_estimator("xgboost"))
print(automl.best_iteration)
@ -86,7 +87,11 @@ class TestRegression(unittest.TestCase):
)
print(automl.model.estimator)
y_pred2 = automl.predict(X_train)
assert (y_pred == y_pred2).all()
# In some rare case, the last config is early stopped and it's the best config. But the logged config's n_estimator is not reduced.
assert (
n_iter != automl.model.estimator.get_params("n_estimator")
or (y_pred == y_pred2).all()
)
def test_sparse_matrix_regression(self):
X_train = scipy.sparse.random(300, 900, density=0.0001)

View File

@ -12,6 +12,7 @@ from flaml import AutoVW
import string
import os
import openml
from requests.exceptions import SSLError
VW_DS_DIR = "test/data/"
NS_LIST = list(string.ascii_lowercase) + list(string.ascii_uppercase)
@ -96,10 +97,14 @@ def shuffle_data(X, y, seed):
def get_oml_to_vw(did, max_ns_num, ds_dir=VW_DS_DIR):
success = False
print("-----getting oml dataset-------", did)
ds = openml.datasets.get_dataset(did)
target_attribute = ds.default_target_attribute
# if target_attribute is None and did in OML_target_attribute_dict:
# target_attribute = OML_target_attribute_dict[did]
try:
ds = openml.datasets.get_dataset(did)
target_attribute = ds.default_target_attribute
# if target_attribute is None and did in OML_target_attribute_dict:
# target_attribute = OML_target_attribute_dict[did]
except (SSLError) as e:
print(e)
return
print("target=ds.default_target_attribute", target_attribute)
data = ds.get_data(target=target_attribute, dataset_format="array")