mirror of https://github.com/microsoft/autogen.git
V0.2.2 (#19)
* v0.2.2 separate the HPO part into the module flaml.tune enhanced implementation of FLOW^2, CFO and BlendSearch support parallel tuning using ray tune add support for sample_weight and generic fit arguments enable mlflow logging Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
This commit is contained in:
parent
53e300ae02
commit
776aa55189
|
@ -38,6 +38,10 @@ jobs:
|
|||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -e .[test]
|
||||
- name: If linux or max, install ray
|
||||
if: matrix.os == 'macOS-latest' or 'ubuntu-latest'
|
||||
run: |
|
||||
pip install -e .[ray]
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
|
|
|
@ -148,3 +148,4 @@ dmypy.json
|
|||
cython_debug/
|
||||
/catboost_info
|
||||
notebook/*.pkl
|
||||
notebook/.azureml
|
||||
|
|
19
LICENSE
19
LICENSE
|
@ -19,3 +19,22 @@
|
|||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE
|
||||
|
||||
-------------
|
||||
Code in tune/[analysis.py, sample.py, trial.py] and
|
||||
searcher/[suggestion.py, variant_generator.py] is adapted from
|
||||
https://github.com/ray-project/ray/blob/master/python/ray/tune/
|
||||
|
||||
# Copyright 2020 The Ray Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
56
README.md
56
README.md
|
@ -1,12 +1,17 @@
|
|||
# FLAML - Fast and Lightweight AutoML
|
||||
|
||||
<p align="center">
|
||||
<img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/FLAML.png" width=200>
|
||||
<br>
|
||||
</p>
|
||||
|
||||
FLAML is a Python library designed to automatically produce accurate machine
|
||||
learning models with low computational cost. It frees users from selecting
|
||||
learners and hyperparameters for each learner. It is fast and cheap.
|
||||
The simple and lightweight design makes it easy to extend, such as
|
||||
adding customized learners or metrics. FLAML is powered by a new, cost-effective
|
||||
hyperparameter optimization and learner selection method invented by
|
||||
Microsoft Research.
|
||||
adding customized learners or metrics. FLAML is powered by a new, [cost-effective
|
||||
hyperparameter optimization](https://github.com/microsoft/FLAML/tree/main/flaml/tune)
|
||||
and learner selection method invented by Microsoft Research.
|
||||
FLAML is easy to use:
|
||||
|
||||
* With three lines of code, you can start using this economical and fast
|
||||
|
@ -23,10 +28,10 @@ tool for XGBoost, LightGBM, Random Forest etc. or a customized learner.
|
|||
automl.fit(X_train, y_train, task="classification", estimator_list=["lgbm"])
|
||||
```
|
||||
|
||||
* You can embed FLAML in self-tuning software for just-in-time tuning with
|
||||
low latency & resource consumption.
|
||||
* You can also run generic ray-tune style hyperparameter tuning for a custom function.
|
||||
```python
|
||||
automl.fit(X_train, y_train, task="regression", time_budget=60)
|
||||
from flaml import tune
|
||||
tune.run(train_with_config, config={…}, init_config={…}, time_budget_s=3600)
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
@ -51,9 +56,9 @@ A basic classification example.
|
|||
```python
|
||||
from flaml import AutoML
|
||||
from sklearn.datasets import load_iris
|
||||
# Initialize the FLAML learner.
|
||||
# Initialize an AutoML instance
|
||||
automl = AutoML()
|
||||
# Provide configurations.
|
||||
# Specify automl goal and constraint
|
||||
automl_settings = {
|
||||
"time_budget": 10, # in seconds
|
||||
"metric": 'accuracy',
|
||||
|
@ -61,12 +66,12 @@ automl_settings = {
|
|||
"log_file_name": "test/iris.log",
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
# Train with labeled input data.
|
||||
# Train with labeled input data
|
||||
automl.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings)
|
||||
# Predict
|
||||
print(automl.predict_proba(X_train))
|
||||
# Export the best model.
|
||||
# Export the best model
|
||||
print(automl.model)
|
||||
```
|
||||
|
||||
|
@ -75,9 +80,9 @@ A basic regression example.
|
|||
```python
|
||||
from flaml import AutoML
|
||||
from sklearn.datasets import load_boston
|
||||
# Initialize the FLAML learner.
|
||||
# Initialize an AutoML instance
|
||||
automl = AutoML()
|
||||
# Provide configurations.
|
||||
# Specify automl goal and constraint
|
||||
automl_settings = {
|
||||
"time_budget": 10, # in seconds
|
||||
"metric": 'r2',
|
||||
|
@ -85,25 +90,39 @@ automl_settings = {
|
|||
"log_file_name": "test/boston.log",
|
||||
}
|
||||
X_train, y_train = load_boston(return_X_y=True)
|
||||
# Train with labeled input data.
|
||||
# Train with labeled input data
|
||||
automl.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings)
|
||||
# Predict
|
||||
print(automl.predict(X_train))
|
||||
# Export the best model.
|
||||
# Export the best model
|
||||
print(automl.model)
|
||||
```
|
||||
|
||||
More examples: see the [notebook](https://github.com/microsoft/FLAML/tree/main/notebook/flaml_demo.ipynb)
|
||||
More examples can be found in [notebooks](https://github.com/microsoft/FLAML/tree/main/notebook/).
|
||||
|
||||
## Documentation
|
||||
|
||||
The API documentation is [here](https://microsoft.github.io/FLAML/).
|
||||
|
||||
Read more about the
|
||||
hyperparameter optimization methods
|
||||
in FLAML [here](https://github.com/microsoft/FLAML/tree/main/flaml/tune). They can be used beyond the AutoML context.
|
||||
And they can be used in distributed HPO frameworks such as ray tune or nni.
|
||||
|
||||
For more technical details, please check our papers.
|
||||
|
||||
* [FLAML: A Fast and Lightweight AutoML Library](https://arxiv.org/abs/1911.04706). Chi Wang, Qingyun Wu, Markus Weimer, Erkang Zhu. arXiv:1911.04706, 2020.
|
||||
* [Frugal Optimization for Cost-related Hyperparameters](https://arxiv.org/abs/2005.01571). Qingyun Wu, Chi Wang, Silu Huang. To appear in AAAI 2021.
|
||||
* [FLAML: A Fast and Lightweight AutoML Library](https://arxiv.org/abs/1911.04706). Chi Wang, Qingyun Wu, Markus Weimer, Erkang Zhu. To appear in MLSys, 2021.
|
||||
```
|
||||
@inproceedings{wang2021flaml,
|
||||
title={Frugal Optimization for Cost-related Hyperparameters},
|
||||
author={Chi Wang and Qingyun Wu and Markus Weimer and Erkang Zhu},
|
||||
year={2021},
|
||||
booktitle={MLSys},
|
||||
}
|
||||
```
|
||||
* [Frugal Optimization for Cost-related Hyperparameters](https://arxiv.org/abs/2005.01571). Qingyun Wu, Chi Wang, Silu Huang. AAAI 2021.
|
||||
* Economical Hyperparameter Optimization With Blended Search Strategy. Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. To appear in ICLR 2021.
|
||||
|
||||
## Contributing
|
||||
|
||||
|
@ -123,9 +142,8 @@ contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additio
|
|||
|
||||
* Chi Wang
|
||||
* Qingyun Wu
|
||||
* Erkang Zhu
|
||||
|
||||
Contributors: Markus Weimer, Silu Huang, Haozhe Zhang, Alex Deng.
|
||||
Contributors (alphabetical order): Alex Deng, Silu Huang, John Langford, Amin Saied, Markus Weimer, Haozhe Zhang, Erkang Zhu.
|
||||
|
||||
## License
|
||||
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 13 KiB |
Binary file not shown.
After Width: | Height: | Size: 2.8 KiB |
Binary file not shown.
After Width: | Height: | Size: 36 KiB |
Binary file not shown.
After Width: | Height: | Size: 8.9 MiB |
Binary file not shown.
After Width: | Height: | Size: 8.1 MiB |
|
@ -1,3 +1,4 @@
|
|||
from flaml.searcher import CFO, BlendSearch, FLOW2
|
||||
from flaml.automl import AutoML
|
||||
from flaml.version import __version__
|
||||
import logging
|
||||
|
|
2121
flaml/automl.py
2121
flaml/automl.py
File diff suppressed because it is too large
Load Diff
|
@ -1,31 +1,13 @@
|
|||
'''!
|
||||
* Copyright (c) 2020 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License.
|
||||
'''
|
||||
|
||||
N_SPLITS = 5
|
||||
RANDOM_SEED = 1
|
||||
SPLIT_RATIO = 0.1
|
||||
HISTORY_SIZE = 10000000
|
||||
MEM_THRES = 4*(1024**3)
|
||||
SMALL_LARGE_THRES = 10000000
|
||||
MIN_SAMPLE_TRAIN = 10000
|
||||
MIN_SAMPLE_VAL = 10000
|
||||
CV_HOLDOUT_THRESHOLD = 100000
|
||||
|
||||
BASE_Const = 2
|
||||
BASE_LOWER_BOUND = 2**(0.01)
|
||||
|
||||
ETI_INI = {
|
||||
'lgbm':1,
|
||||
'xgboost':1.6,
|
||||
'xgboost_nb':1.6,
|
||||
'rf':2,
|
||||
'lrl1':160,
|
||||
'lrl2':25,
|
||||
'linear_svc':16,
|
||||
'kneighbor':30,
|
||||
'catboost':15,
|
||||
'extra_tree':1.9,
|
||||
'nn':50,
|
||||
}
|
||||
'''!
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License.
|
||||
'''
|
||||
|
||||
N_SPLITS = 5
|
||||
RANDOM_SEED = 1
|
||||
SPLIT_RATIO = 0.1
|
||||
MEM_THRES = 4*(1024**3)
|
||||
SMALL_LARGE_THRES = 10000000
|
||||
MIN_SAMPLE_TRAIN = 10000
|
||||
CV_HOLDOUT_THRESHOLD = 100000
|
||||
SAMPLE_MULTIPLY_FACTOR = 4
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
'''!
|
||||
* Copyright (c) 2020 Microsoft Corporation. All rights reserved.
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License.
|
||||
'''
|
||||
|
||||
|
@ -122,7 +122,6 @@ def get_output_from_log(filename, time_budget):
|
|||
A list of the estimator, sample size and config of each logged iter
|
||||
logged_metric_list: A list of the logged metric of each logged iter
|
||||
'''
|
||||
import ast
|
||||
|
||||
best_config = None
|
||||
best_learner = None
|
||||
|
@ -169,13 +168,13 @@ def concat(X1, X2):
|
|||
'''concatenate two matrices vertically
|
||||
'''
|
||||
if isinstance(X1, pd.DataFrame) or isinstance(X1, pd.Series):
|
||||
df = pd.concat([X1, X2], sort=False)
|
||||
df.reset_index(drop=True, inplace=True)
|
||||
if isinstance(X1, pd.DataFrame):
|
||||
cat_columns = X1.select_dtypes(
|
||||
include='category').columns
|
||||
df = pd.concat([X1, X2], sort=False)
|
||||
df.reset_index(drop=True, inplace=True)
|
||||
if isinstance(X1, pd.DataFrame) and len(cat_columns):
|
||||
df[cat_columns] = df[cat_columns].astype('category')
|
||||
if len(cat_columns):
|
||||
df[cat_columns] = df[cat_columns].astype('category')
|
||||
return df
|
||||
if issparse(X1):
|
||||
return vstack((X1, X2))
|
||||
|
@ -187,7 +186,8 @@ class DataTransformer:
|
|||
'''transform X, y
|
||||
'''
|
||||
|
||||
def fit_transform(self, X, y, objective):
|
||||
|
||||
def fit_transform(self, X, y, task):
|
||||
if isinstance(X, pd.DataFrame):
|
||||
X = X.copy()
|
||||
n = X.shape[0]
|
||||
|
@ -224,9 +224,9 @@ class DataTransformer:
|
|||
SimpleImputer(missing_values=np.nan, strategy='median'),
|
||||
num_columns)])
|
||||
X[num_columns] = self.transformer.fit_transform(X)
|
||||
self.cat_columns, self.num_columns = cat_columns, num_columns
|
||||
|
||||
if objective == 'regression':
|
||||
self._cat_columns, self._num_columns = cat_columns, num_columns
|
||||
|
||||
if task == 'regression':
|
||||
self.label_transformer = None
|
||||
else:
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
|
@ -236,7 +236,7 @@ class DataTransformer:
|
|||
|
||||
def transform(self, X):
|
||||
if isinstance(X, pd.DataFrame):
|
||||
cat_columns, num_columns = self.cat_columns, self.num_columns
|
||||
cat_columns, num_columns = self._cat_columns, self._num_columns
|
||||
X = X[cat_columns + num_columns].copy()
|
||||
for column in cat_columns:
|
||||
# print(column, X[column].dtype.name)
|
||||
|
|
517
flaml/ml.py
517
flaml/ml.py
|
@ -1,244 +1,273 @@
|
|||
'''!
|
||||
* Copyright (c) 2020 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License.
|
||||
'''
|
||||
|
||||
from .model import *
|
||||
import time
|
||||
from sklearn.metrics import mean_squared_error, r2_score, roc_auc_score, \
|
||||
accuracy_score, mean_absolute_error, log_loss, average_precision_score, \
|
||||
f1_score
|
||||
import numpy as np
|
||||
from sklearn.model_selection import RepeatedStratifiedKFold
|
||||
|
||||
|
||||
def get_estimator_class(objective_name, estimator_name):
|
||||
''' when adding a new learner, need to add an elif branch '''
|
||||
|
||||
|
||||
if 'xgboost' in estimator_name:
|
||||
if 'regression' in objective_name:
|
||||
estimator_class = XGBoostEstimator
|
||||
else:
|
||||
estimator_class = XGBoostSklearnEstimator
|
||||
elif 'rf' in estimator_name:
|
||||
estimator_class = RandomForestEstimator
|
||||
elif 'lgbm' in estimator_name:
|
||||
estimator_class = LGBMEstimator
|
||||
elif 'lrl1' in estimator_name:
|
||||
estimator_class = LRL1Classifier
|
||||
elif 'lrl2' in estimator_name:
|
||||
estimator_class = LRL2Classifier
|
||||
elif 'catboost' in estimator_name:
|
||||
estimator_class = CatBoostEstimator
|
||||
elif 'extra_tree' in estimator_name:
|
||||
estimator_class = ExtraTreeEstimator
|
||||
elif 'kneighbor' in estimator_name:
|
||||
estimator_class = KNeighborsEstimator
|
||||
else:
|
||||
raise ValueError(estimator_name + ' is not a built-in learner. '
|
||||
'Please use AutoML.add_learner() to add a customized learner.')
|
||||
return estimator_class
|
||||
|
||||
|
||||
def sklearn_metric_loss_score(metric_name, y_predict, y_true, labels=None):
|
||||
'''Loss using the specified metric
|
||||
|
||||
Args:
|
||||
metric_name: A string of the mtric name, one of
|
||||
'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'log_loss',
|
||||
'f1', 'ap'
|
||||
y_predict: A 1d or 2d numpy array of the predictions which can be
|
||||
used to calculate the metric. E.g., 2d for log_loss and 1d
|
||||
for others.
|
||||
y_true: A 1d numpy array of the true labels
|
||||
labels: A 1d numpy array of the unique labels
|
||||
|
||||
Returns:
|
||||
score: A float number of the loss, the lower the better
|
||||
'''
|
||||
metric_name = metric_name.lower()
|
||||
if 'r2' in metric_name:
|
||||
score = 1.0 - r2_score(y_true, y_predict)
|
||||
elif metric_name == 'rmse':
|
||||
score = np.sqrt(mean_squared_error(y_true, y_predict))
|
||||
elif metric_name == 'mae':
|
||||
score = mean_absolute_error(y_true, y_predict)
|
||||
elif metric_name == 'mse':
|
||||
score = mean_squared_error(y_true, y_predict)
|
||||
elif metric_name == 'accuracy':
|
||||
score = 1.0 - accuracy_score(y_true, y_predict)
|
||||
elif 'roc_auc' in metric_name:
|
||||
score = 1.0 - roc_auc_score(y_true, y_predict)
|
||||
elif 'log_loss' in metric_name:
|
||||
score = log_loss(y_true, y_predict, labels=labels)
|
||||
elif 'f1' in metric_name:
|
||||
score = 1 - f1_score(y_true, y_predict)
|
||||
elif 'ap' in metric_name:
|
||||
score = 1 - average_precision_score(y_true, y_predict)
|
||||
else:
|
||||
raise ValueError(metric_name+' is not a built-in metric, '
|
||||
'currently built-in metrics are: '
|
||||
'r2, rmse, mae, mse, accuracy, roc_auc, log_loss, f1, ap. '
|
||||
'please pass a customized metric function to AutoML.fit(metric=func)')
|
||||
return score
|
||||
|
||||
|
||||
def get_y_pred(estimator, X, eval_metric, obj):
|
||||
if eval_metric in ['roc_auc', 'ap'] and 'binary' in obj:
|
||||
y_pred_classes = estimator.predict_proba(X)
|
||||
y_pred = y_pred_classes[:,
|
||||
1] if y_pred_classes.ndim>1 else y_pred_classes
|
||||
elif eval_metric in ['log_loss', 'roc_auc']:
|
||||
y_pred = estimator.predict_proba(X)
|
||||
else:
|
||||
try:
|
||||
y_pred = estimator.predict(X)
|
||||
except:
|
||||
y_pred = np.ones(X.shape[0])
|
||||
return y_pred
|
||||
|
||||
|
||||
def get_test_loss(estimator, X_train, y_train, X_test, y_test, eval_metric, obj,
|
||||
labels=None, budget=None, train_loss=False):
|
||||
start = time.time()
|
||||
train_time = estimator.fit(X_train, y_train, budget)
|
||||
if isinstance(eval_metric, str):
|
||||
test_pred_y = get_y_pred(estimator, X_test, eval_metric, obj)
|
||||
test_loss = sklearn_metric_loss_score(eval_metric, test_pred_y, y_test,
|
||||
labels)
|
||||
if train_loss != False:
|
||||
test_pred_y = get_y_pred(estimator, X_train, eval_metric, obj)
|
||||
train_loss = sklearn_metric_loss_score(eval_metric, test_pred_y,
|
||||
y_train, labels)
|
||||
else: # customized metric function
|
||||
test_loss, train_loss = eval_metric(
|
||||
X_test, y_test, estimator, labels, X_train, y_train)
|
||||
train_time = time.time()-start
|
||||
return test_loss, train_time, train_loss
|
||||
|
||||
|
||||
def train_model(estimator, X_train, y_train, budget):
|
||||
train_time = estimator.fit(X_train, y_train, budget)
|
||||
return train_time
|
||||
|
||||
|
||||
def evaluate_model(estimator, X_train, y_train, X_val, y_val, budget, kf,
|
||||
objective_name, eval_method, eval_metric, best_val_loss, train_loss=False):
|
||||
if 'holdout' in eval_method:
|
||||
val_loss, train_loss, train_time = evaluate_model_holdout(
|
||||
estimator, X_train, y_train, X_val, y_val, budget,
|
||||
objective_name, eval_metric, best_val_loss, train_loss=train_loss)
|
||||
else:
|
||||
val_loss, train_loss, train_time = evaluate_model_CV(
|
||||
estimator, X_train, y_train, budget, kf, objective_name,
|
||||
eval_metric, best_val_loss, train_loss=train_loss)
|
||||
return val_loss, train_loss, train_time
|
||||
|
||||
|
||||
def evaluate_model_holdout(estimator, X_train, y_train, X_val, y_val, budget,
|
||||
objective_name, eval_metric, best_val_loss, train_loss=False):
|
||||
val_loss, train_time, train_loss = get_test_loss(
|
||||
estimator, X_train, y_train, X_val, y_val, eval_metric, objective_name,
|
||||
budget = budget, train_loss=train_loss)
|
||||
return val_loss, train_loss, train_time
|
||||
|
||||
|
||||
def evaluate_model_CV(estimator, X_train_all, y_train_all, budget, kf,
|
||||
objective_name, eval_metric, best_val_loss, train_loss=False):
|
||||
start_time = time.time()
|
||||
total_val_loss = total_train_loss = 0
|
||||
train_time = 0
|
||||
valid_fold_num = 0
|
||||
n = kf.get_n_splits()
|
||||
X_train_split, y_train_split = X_train_all, y_train_all
|
||||
if objective_name=='regression':
|
||||
labels = None
|
||||
else:
|
||||
labels = np.unique(y_train_all)
|
||||
|
||||
if isinstance(kf, RepeatedStratifiedKFold):
|
||||
kf = kf.split(X_train_split, y_train_split)
|
||||
else:
|
||||
kf = kf.split(X_train_split)
|
||||
rng = np.random.RandomState(2020)
|
||||
val_loss_list = []
|
||||
budget_per_train = budget / (n+1)
|
||||
for train_index, val_index in kf:
|
||||
train_index = rng.permutation(train_index)
|
||||
if isinstance(X_train_all, pd.DataFrame):
|
||||
X_train, X_val = X_train_split.iloc[
|
||||
train_index], X_train_split.iloc[val_index]
|
||||
else:
|
||||
X_train, X_val = X_train_split[
|
||||
train_index], X_train_split[val_index]
|
||||
if isinstance(y_train_all, pd.Series):
|
||||
y_train, y_val = y_train_split.iloc[
|
||||
train_index], y_train_split.iloc[val_index]
|
||||
else:
|
||||
y_train, y_val = y_train_split[
|
||||
train_index], y_train_split[val_index]
|
||||
estimator.cleanup()
|
||||
val_loss_i, train_time_i, train_loss_i = get_test_loss(
|
||||
estimator, X_train, y_train, X_val, y_val, eval_metric,
|
||||
objective_name, labels, budget_per_train, train_loss=train_loss)
|
||||
valid_fold_num += 1
|
||||
total_val_loss += val_loss_i
|
||||
if train_loss != False:
|
||||
if total_train_loss != 0: total_train_loss += train_loss_i
|
||||
else: total_train_loss = train_loss_i
|
||||
train_time += train_time_i
|
||||
if valid_fold_num == n:
|
||||
val_loss_list.append(total_val_loss/valid_fold_num)
|
||||
total_val_loss = valid_fold_num = 0
|
||||
elif time.time() - start_time >= budget:
|
||||
val_loss_list.append(total_val_loss/valid_fold_num)
|
||||
break
|
||||
val_loss = np.max(val_loss_list)
|
||||
if train_loss != False: train_loss = total_train_loss/n
|
||||
budget -= time.time() - start_time
|
||||
if val_loss < best_val_loss and budget > budget_per_train:
|
||||
estimator.cleanup()
|
||||
train_time_full = estimator.fit(X_train_all, y_train_all, budget)
|
||||
train_time += train_time_full
|
||||
return val_loss, train_loss, train_time
|
||||
|
||||
|
||||
def compute_estimator(X_train, y_train, X_val, y_val, budget, kf,
|
||||
config_dic, objective_name, estimator_name, eval_method, eval_metric,
|
||||
best_val_loss = np.Inf, n_jobs=1, estimator_class=None, train_loss=False):
|
||||
start_time = time.time()
|
||||
estimator_class = estimator_class or get_estimator_class(
|
||||
objective_name, estimator_name)
|
||||
estimator = estimator_class(
|
||||
**config_dic, objective_name = objective_name, n_jobs=n_jobs)
|
||||
val_loss, train_loss, train_time = evaluate_model(
|
||||
estimator, X_train, y_train, X_val, y_val, budget, kf, objective_name,
|
||||
eval_method, eval_metric, best_val_loss, train_loss=train_loss)
|
||||
all_time = time.time() - start_time
|
||||
return estimator, val_loss, train_loss, train_time, all_time
|
||||
|
||||
|
||||
def train_estimator(X_train, y_train, config_dic, objective_name,
|
||||
estimator_name, n_jobs=1, estimator_class=None, budget=None):
|
||||
start_time = time.time()
|
||||
estimator_class = estimator_class or get_estimator_class(objective_name,
|
||||
estimator_name)
|
||||
estimator = estimator_class(**config_dic, objective_name = objective_name,
|
||||
n_jobs=n_jobs)
|
||||
if X_train is not None:
|
||||
train_time = train_model(estimator, X_train, y_train, budget)
|
||||
else:
|
||||
estimator = estimator.estimator_class(**estimator.params)
|
||||
train_time = time.time() - start_time
|
||||
return estimator, train_time
|
||||
|
||||
|
||||
def get_classification_objective(num_labels: int) -> str:
|
||||
if num_labels == 2:
|
||||
objective_name = 'binary:logistic'
|
||||
else:
|
||||
objective_name = 'multi:softmax'
|
||||
return objective_name
|
||||
|
||||
|
||||
'''!
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License.
|
||||
'''
|
||||
|
||||
from .model import *
|
||||
import time
|
||||
from sklearn.metrics import mean_squared_error, r2_score, roc_auc_score, \
|
||||
accuracy_score, mean_absolute_error, log_loss, average_precision_score, \
|
||||
f1_score
|
||||
import numpy as np
|
||||
from sklearn.model_selection import RepeatedStratifiedKFold
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_estimator_class(task, estimator_name):
|
||||
''' when adding a new learner, need to add an elif branch '''
|
||||
|
||||
|
||||
if 'xgboost' in estimator_name:
|
||||
if 'regression' in task:
|
||||
estimator_class = XGBoostEstimator
|
||||
else:
|
||||
estimator_class = XGBoostSklearnEstimator
|
||||
elif 'rf' in estimator_name:
|
||||
estimator_class = RandomForestEstimator
|
||||
elif 'lgbm' in estimator_name:
|
||||
estimator_class = LGBMEstimator
|
||||
elif 'lrl1' in estimator_name:
|
||||
estimator_class = LRL1Classifier
|
||||
elif 'lrl2' in estimator_name:
|
||||
estimator_class = LRL2Classifier
|
||||
elif 'catboost' in estimator_name:
|
||||
estimator_class = CatBoostEstimator
|
||||
elif 'extra_tree' in estimator_name:
|
||||
estimator_class = ExtraTreeEstimator
|
||||
elif 'kneighbor' in estimator_name:
|
||||
estimator_class = KNeighborsEstimator
|
||||
else:
|
||||
raise ValueError(estimator_name + ' is not a built-in learner. '
|
||||
'Please use AutoML.add_learner() to add a customized learner.')
|
||||
return estimator_class
|
||||
|
||||
|
||||
def sklearn_metric_loss_score(metric_name, y_predict, y_true, labels=None,
|
||||
sample_weight=None):
|
||||
'''Loss using the specified metric
|
||||
|
||||
Args:
|
||||
metric_name: A string of the mtric name, one of
|
||||
'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'log_loss',
|
||||
'f1', 'ap'
|
||||
y_predict: A 1d or 2d numpy array of the predictions which can be
|
||||
used to calculate the metric. E.g., 2d for log_loss and 1d
|
||||
for others.
|
||||
y_true: A 1d numpy array of the true labels
|
||||
labels: A 1d numpy array of the unique labels
|
||||
sample_weight: A 1d numpy array of the sample weight
|
||||
|
||||
Returns:
|
||||
score: A float number of the loss, the lower the better
|
||||
'''
|
||||
metric_name = metric_name.lower()
|
||||
if 'r2' in metric_name:
|
||||
score = 1.0 - r2_score(y_true, y_predict, sample_weight=sample_weight)
|
||||
elif metric_name == 'rmse':
|
||||
score = np.sqrt(mean_squared_error(y_true, y_predict,
|
||||
sample_weight=sample_weight))
|
||||
elif metric_name == 'mae':
|
||||
score = mean_absolute_error(y_true, y_predict,
|
||||
sample_weight=sample_weight)
|
||||
elif metric_name == 'mse':
|
||||
score = mean_squared_error(y_true, y_predict,
|
||||
sample_weight=sample_weight)
|
||||
elif metric_name == 'accuracy':
|
||||
score = 1.0 - accuracy_score(y_true, y_predict,
|
||||
sample_weight=sample_weight)
|
||||
elif 'roc_auc' in metric_name:
|
||||
score = 1.0 - roc_auc_score(y_true, y_predict,
|
||||
sample_weight=sample_weight)
|
||||
elif 'log_loss' in metric_name:
|
||||
score = log_loss(y_true, y_predict, labels=labels,
|
||||
sample_weight=sample_weight)
|
||||
elif 'f1' in metric_name:
|
||||
score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight)
|
||||
elif 'ap' in metric_name:
|
||||
score = 1 - average_precision_score(y_true, y_predict,
|
||||
sample_weight=sample_weight)
|
||||
else:
|
||||
raise ValueError(metric_name+' is not a built-in metric, '
|
||||
'currently built-in metrics are: '
|
||||
'r2, rmse, mae, mse, accuracy, roc_auc, log_loss, f1, ap. '
|
||||
'please pass a customized metric function to AutoML.fit(metric=func)')
|
||||
return score
|
||||
|
||||
|
||||
def get_y_pred(estimator, X, eval_metric, obj):
|
||||
if eval_metric in ['roc_auc', 'ap'] and 'binary' in obj:
|
||||
y_pred_classes = estimator.predict_proba(X)
|
||||
y_pred = y_pred_classes[:,
|
||||
1] if y_pred_classes.ndim>1 else y_pred_classes
|
||||
elif eval_metric in ['log_loss', 'roc_auc']:
|
||||
y_pred = estimator.predict_proba(X)
|
||||
else:
|
||||
try:
|
||||
y_pred = estimator.predict(X)
|
||||
except:
|
||||
logger.debug("prediction failed. Using a constant predictor.")
|
||||
y_pred = np.ones(X.shape[0])
|
||||
return y_pred
|
||||
|
||||
|
||||
def get_test_loss(estimator, X_train, y_train, X_test, y_test, weight_test,
|
||||
eval_metric, obj, labels=None, budget=None, train_loss=False, fit_kwargs={}):
|
||||
start = time.time()
|
||||
train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
|
||||
if isinstance(eval_metric, str):
|
||||
test_pred_y = get_y_pred(estimator, X_test, eval_metric, obj)
|
||||
test_loss = sklearn_metric_loss_score(eval_metric, test_pred_y, y_test,
|
||||
labels, weight_test)
|
||||
if train_loss != False:
|
||||
test_pred_y = get_y_pred(estimator, X_train, eval_metric, obj)
|
||||
train_loss = sklearn_metric_loss_score(eval_metric, test_pred_y,
|
||||
y_train, labels, fit_kwargs.get('sample_weight'))
|
||||
else: # customized metric function
|
||||
test_loss, train_loss = eval_metric(
|
||||
X_test, y_test, estimator, labels, X_train, y_train,
|
||||
weight_test, fit_kwargs.get('sample_weight'))
|
||||
train_time = time.time()-start
|
||||
return test_loss, train_time, train_loss
|
||||
|
||||
|
||||
def train_model(estimator, X_train, y_train, budget, fit_kwargs={}):
|
||||
train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
|
||||
return train_time
|
||||
|
||||
|
||||
def evaluate_model(estimator, X_train, y_train, X_val, y_val, weight_val,
|
||||
budget, kf, task, eval_method, eval_metric, best_val_loss, train_loss=False,
|
||||
fit_kwargs={}):
|
||||
if 'holdout' in eval_method:
|
||||
val_loss, train_loss, train_time = evaluate_model_holdout(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val, budget,
|
||||
task, eval_metric, best_val_loss, train_loss=train_loss,
|
||||
fit_kwargs=fit_kwargs)
|
||||
else:
|
||||
val_loss, train_loss, train_time = evaluate_model_CV(
|
||||
estimator, X_train, y_train, budget, kf, task,
|
||||
eval_metric, best_val_loss, train_loss=train_loss,
|
||||
fit_kwargs=fit_kwargs)
|
||||
return val_loss, train_loss, train_time
|
||||
|
||||
|
||||
def evaluate_model_holdout(estimator, X_train, y_train, X_val, y_val,
|
||||
weight_val, budget, task, eval_metric, best_val_loss, train_loss=False,
|
||||
fit_kwargs={}):
|
||||
val_loss, train_time, train_loss = get_test_loss(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val, eval_metric,
|
||||
task, budget = budget, train_loss=train_loss, fit_kwargs=fit_kwargs)
|
||||
return val_loss, train_loss, train_time
|
||||
|
||||
|
||||
def evaluate_model_CV(estimator, X_train_all, y_train_all, budget, kf,
|
||||
task, eval_metric, best_val_loss, train_loss=False, fit_kwargs={}):
|
||||
start_time = time.time()
|
||||
total_val_loss = total_train_loss = 0
|
||||
train_time = 0
|
||||
valid_fold_num = 0
|
||||
n = kf.get_n_splits()
|
||||
X_train_split, y_train_split = X_train_all, y_train_all
|
||||
if task=='regression':
|
||||
labels = None
|
||||
else:
|
||||
labels = np.unique(y_train_all)
|
||||
|
||||
if isinstance(kf, RepeatedStratifiedKFold):
|
||||
kf = kf.split(X_train_split, y_train_split)
|
||||
else:
|
||||
kf = kf.split(X_train_split)
|
||||
rng = np.random.RandomState(2020)
|
||||
val_loss_list = []
|
||||
budget_per_train = budget / (n+1)
|
||||
if 'sample_weight' in fit_kwargs:
|
||||
weight = fit_kwargs['sample_weight']
|
||||
weight_val = None
|
||||
else:
|
||||
weight = weight_val = None
|
||||
for train_index, val_index in kf:
|
||||
train_index = rng.permutation(train_index)
|
||||
if isinstance(X_train_all, pd.DataFrame):
|
||||
X_train, X_val = X_train_split.iloc[
|
||||
train_index], X_train_split.iloc[val_index]
|
||||
else:
|
||||
X_train, X_val = X_train_split[
|
||||
train_index], X_train_split[val_index]
|
||||
if isinstance(y_train_all, pd.Series):
|
||||
y_train, y_val = y_train_split.iloc[
|
||||
train_index], y_train_split.iloc[val_index]
|
||||
else:
|
||||
y_train, y_val = y_train_split[
|
||||
train_index], y_train_split[val_index]
|
||||
estimator.cleanup()
|
||||
if weight is not None:
|
||||
fit_kwargs['sample_weight'], weight_val = weight[
|
||||
train_index], weight[val_index]
|
||||
val_loss_i, train_time_i, train_loss_i = get_test_loss(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val,
|
||||
eval_metric, task, labels, budget_per_train,
|
||||
train_loss=train_loss, fit_kwargs=fit_kwargs)
|
||||
if weight is not None:
|
||||
fit_kwargs['sample_weight'] = weight
|
||||
valid_fold_num += 1
|
||||
total_val_loss += val_loss_i
|
||||
if train_loss != False:
|
||||
if total_train_loss != 0: total_train_loss += train_loss_i
|
||||
else: total_train_loss = train_loss_i
|
||||
train_time += train_time_i
|
||||
if valid_fold_num == n:
|
||||
val_loss_list.append(total_val_loss/valid_fold_num)
|
||||
total_val_loss = valid_fold_num = 0
|
||||
elif time.time() - start_time >= budget:
|
||||
val_loss_list.append(total_val_loss/valid_fold_num)
|
||||
break
|
||||
val_loss = np.max(val_loss_list)
|
||||
if train_loss != False: train_loss = total_train_loss/n
|
||||
budget -= time.time() - start_time
|
||||
if val_loss < best_val_loss and budget > budget_per_train:
|
||||
estimator.cleanup()
|
||||
estimator.fit(X_train_all, y_train_all, budget, **fit_kwargs)
|
||||
return val_loss, train_loss, train_time
|
||||
|
||||
|
||||
def compute_estimator(X_train, y_train, X_val, y_val, weight_val, budget, kf,
|
||||
config_dic, task, estimator_name, eval_method, eval_metric,
|
||||
best_val_loss = np.Inf, n_jobs=1, estimator_class=None, train_loss=False,
|
||||
fit_kwargs = {}):
|
||||
start_time = time.time()
|
||||
estimator_class = estimator_class or get_estimator_class(
|
||||
task, estimator_name)
|
||||
estimator = estimator_class(
|
||||
**config_dic, task = task, n_jobs=n_jobs)
|
||||
val_loss, train_loss, train_time = evaluate_model(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val, budget, kf, task,
|
||||
eval_method, eval_metric, best_val_loss, train_loss=train_loss,
|
||||
fit_kwargs=fit_kwargs)
|
||||
all_time = time.time() - start_time
|
||||
return estimator, val_loss, train_loss, train_time, all_time
|
||||
|
||||
|
||||
def train_estimator(X_train, y_train, config_dic, task,
|
||||
estimator_name, n_jobs=1, estimator_class=None, budget=None, fit_kwargs={}):
|
||||
start_time = time.time()
|
||||
estimator_class = estimator_class or get_estimator_class(task,
|
||||
estimator_name)
|
||||
estimator = estimator_class(**config_dic, task = task,
|
||||
n_jobs=n_jobs)
|
||||
if X_train is not None:
|
||||
train_time = train_model(estimator, X_train, y_train, budget,
|
||||
**fit_kwargs)
|
||||
else:
|
||||
estimator = estimator.estimator_class(**estimator.params)
|
||||
train_time = time.time() - start_time
|
||||
return estimator, train_time
|
||||
|
||||
|
||||
def get_classification_objective(num_labels: int) -> str:
|
||||
if num_labels == 2:
|
||||
objective_name = 'binary:logistic'
|
||||
else:
|
||||
objective_name = 'multi:softmax'
|
||||
return objective_name
|
||||
|
|
1262
flaml/model.py
1262
flaml/model.py
File diff suppressed because it is too large
Load Diff
675
flaml/search.py
675
flaml/search.py
|
@ -1,675 +0,0 @@
|
|||
'''!
|
||||
* Copyright (c) 2020 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License.
|
||||
'''
|
||||
|
||||
from functools import partial
|
||||
from .ml import train_estimator
|
||||
import time
|
||||
import math
|
||||
import numpy as np
|
||||
from .space import config_space, estimator_size, get_config_values, \
|
||||
generate_config_ini, generate_config_max, generate_config_min
|
||||
from .config import SPLIT_RATIO, MIN_SAMPLE_TRAIN, \
|
||||
HISTORY_SIZE, MEM_THRES, BASE_Const, BASE_LOWER_BOUND
|
||||
from random import gauss
|
||||
|
||||
|
||||
def rand_vector_unit_sphere(dims):
|
||||
vec = [gauss(0, 1) for i in range(dims)]
|
||||
mag = sum(x**2 for x in vec) ** .5
|
||||
return [x / mag for x in vec]
|
||||
|
||||
|
||||
def rand_vector_gaussian(dims):
|
||||
vec = [gauss(0, 1) for i in range(dims)]
|
||||
return vec
|
||||
|
||||
|
||||
class ParamSearch:
|
||||
'''
|
||||
the class for searching params for 1 learner
|
||||
'''
|
||||
|
||||
def __init__(self, estimator, data_size,
|
||||
compute_with_config, train_with_config, save_info_helper=None,
|
||||
init_sample_size=MIN_SAMPLE_TRAIN, objective_name='regression',
|
||||
log_type='better', config_space_info=None, size_estimator=None,
|
||||
split_ratio=SPLIT_RATIO, base_change='sqrtK', use_dual_dir=True,
|
||||
move_type='geo'):
|
||||
self.log_type = log_type
|
||||
self.base_change = base_change
|
||||
if init_sample_size > data_size:
|
||||
init_sample_size = data_size
|
||||
self.next_sample_size = {}
|
||||
self.prev_sample_size = {}
|
||||
s = init_sample_size
|
||||
self.prev_sample_size[s] = s
|
||||
self.estimator_configspace = config_space_info or config_space(
|
||||
estimator, data_size, objective_name)
|
||||
self.get_size_for_config = size_estimator or (
|
||||
lambda x: estimator_size(x, estimator))
|
||||
config_min_dic_primary, config_min_dic_more, config_min_dic = \
|
||||
generate_config_min(estimator, self.estimator_configspace, None)
|
||||
self.min_config_primary = np.array(
|
||||
list(config_min_dic_primary.values()))
|
||||
self.min_config_more = np.array(list(config_min_dic_more.values()))
|
||||
self.min_config = np.array(list(config_min_dic.values()))
|
||||
# init configurations for different sample size
|
||||
config_init_dic_primary, config_init_dic_more, _, config_type_dic = \
|
||||
generate_config_ini(estimator, self.estimator_configspace)
|
||||
self.init_config_dic_primary = {s: config_init_dic_primary}
|
||||
self.init_config_dic_more = {s: config_init_dic_more}
|
||||
self.init_config_dic_type_dic = {'primary': {
|
||||
s: config_init_dic_primary}, 'more': {s: config_init_dic_more}}
|
||||
self.init_config_dic = {
|
||||
**self.init_config_dic_type_dic['primary'],
|
||||
**self.init_config_dic_type_dic['more']
|
||||
}
|
||||
self.config_type_dic = config_type_dic
|
||||
# max configurations for different sample size
|
||||
config_max_dic_primary, config_max_dic_more, config_max_dic = \
|
||||
generate_config_max(
|
||||
estimator, self.estimator_configspace, int(s))
|
||||
self.max_config_dic_primary = {s: np.array(
|
||||
list(config_max_dic_primary.values()))}
|
||||
self.max_config_dic_more = {s: np.array(
|
||||
list(config_max_dic_more.values()))}
|
||||
self.max_config_dic = {s: np.array(list(config_max_dic.values()))}
|
||||
self.dims = (len(self.min_config_primary), len(self.min_config_more))
|
||||
# print(self.dims)
|
||||
if self.dims[1] > 0 and self.dims[0] > 0:
|
||||
self.base_upper_bound = {
|
||||
s:
|
||||
max(
|
||||
max(
|
||||
(self.max_config_dic_primary[s][i] / self.min_config_primary[i])
|
||||
** math.sqrt(self.dims[0]) for i in range(self.dims[0])
|
||||
),
|
||||
max(
|
||||
(self.max_config_dic_more[s][i] / self.min_config_more[i])
|
||||
** math.sqrt(self.dims[1]) for i in range(self.dims[1]))
|
||||
)
|
||||
}
|
||||
elif self.dims[0] > 0:
|
||||
self.base_upper_bound = {
|
||||
s:
|
||||
max(
|
||||
(self.max_config_dic_primary[s][i] / self.min_config_primary[i])
|
||||
** (math.sqrt(self.dims[0])) for i in range(self.dims[0])
|
||||
)
|
||||
}
|
||||
else:
|
||||
self.base_upper_bound = {
|
||||
s:
|
||||
max(
|
||||
(self.max_config_dic_more[s][i] / self.min_config_more[i])
|
||||
** (math.sqrt(self.dims[1])) for i in range(self.dims[1])
|
||||
)
|
||||
}
|
||||
|
||||
# create sample size sequence
|
||||
while s < data_size:
|
||||
s2 = self.next_sample_size[s] = s * 2 if s * 2 <= data_size else data_size
|
||||
self.prev_sample_size[s2] = s
|
||||
s = s2
|
||||
|
||||
config_max_dic_primary, config_max_dic_more, config_max_dic = \
|
||||
generate_config_max(
|
||||
estimator, self.estimator_configspace, int(s))
|
||||
self.max_config_dic_primary[s] = np.array(
|
||||
list(config_max_dic_primary.values()))
|
||||
self.max_config_dic_more[s] = np.array(
|
||||
list(config_max_dic_more.values()))
|
||||
self.max_config_dic[s] = np.array(list(config_max_dic.values()))
|
||||
if self.dims[1] > 0 and self.dims[0] > 0:
|
||||
self.base_upper_bound[s] = max(
|
||||
max(
|
||||
(self.max_config_dic_primary[s][i]
|
||||
/ self.min_config_primary[i])
|
||||
** math.sqrt(self.dims[0]) for i in range(self.dims[0])
|
||||
),
|
||||
max(
|
||||
(self.max_config_dic_more[s][i]
|
||||
/ self.min_config_more[i])
|
||||
** math.sqrt(self.dims[1]) for i in range(self.dims[1])
|
||||
)
|
||||
)
|
||||
elif self.dims[0] > 0:
|
||||
self.base_upper_bound[s] = max(
|
||||
(self.max_config_dic_primary[s][i]
|
||||
/ self.min_config_primary[i])
|
||||
** math.sqrt(self.dims[0]) for i in range(self.dims[0])
|
||||
)
|
||||
else:
|
||||
self.base_upper_bound[s] = max(
|
||||
(self.max_config_dic_more[s][i] / self.min_config_more[i])
|
||||
** math.sqrt(self.dims[1]) for i in range(self.dims[1])
|
||||
)
|
||||
|
||||
self.init_sample_size = init_sample_size
|
||||
self.data_size = data_size
|
||||
self.sample_size_full = int(self.data_size / (1.0 - split_ratio))
|
||||
|
||||
self.compute_with_config = compute_with_config
|
||||
self.estimator = estimator
|
||||
|
||||
# for logging
|
||||
self.save_helper = save_info_helper
|
||||
self.estimator_type_list = ['primary', 'more']
|
||||
self.dim = self.dims[0] if self.dims[0] > 0 else self.dims[1]
|
||||
self.b = BASE_Const**(math.sqrt(self.dim))
|
||||
self.base_ini = self.b
|
||||
self.total_dim = sum(self.dims)
|
||||
|
||||
self.epo = 2**(self.dim - 1)
|
||||
# keys are [sample size, config], values are (loss, train_time)
|
||||
self.config_tried = {}
|
||||
self.train_with_config = train_with_config
|
||||
|
||||
self.current_config_loss = None
|
||||
self.use_dual_dir = use_dual_dir
|
||||
self.move_type = move_type
|
||||
|
||||
def evaluate_config(self, config, sample_size, move='_pos'):
|
||||
'''
|
||||
evaluate a configuration, update search state,
|
||||
and return whether the state is changed
|
||||
'''
|
||||
if self.time_from_start >= self.time_budget or move != '_ini' and \
|
||||
self.train_time > self.time_budget - self.time_from_start:
|
||||
return False
|
||||
|
||||
model, val_loss, new_train_time, from_history, train_loss = \
|
||||
self.evaluate_proposed_config(config, sample_size, move)
|
||||
# update current config
|
||||
self.update_current_config(config, val_loss, sample_size)
|
||||
# update best model statistics, including statistics about loss and time
|
||||
improved = self.update_search_state_best(
|
||||
config, sample_size, model, val_loss, new_train_time, from_history)
|
||||
self.time_from_start = time.time() - self.start_time
|
||||
if self.save_helper is not None:
|
||||
if from_history:
|
||||
move = move + '_from_hist'
|
||||
self.save_helper.append(self.model_count,
|
||||
train_loss,
|
||||
new_train_time,
|
||||
self.time_from_start,
|
||||
val_loss,
|
||||
config,
|
||||
self.best_loss,
|
||||
self.best_config[0],
|
||||
self.estimator,
|
||||
sample_size)
|
||||
return improved
|
||||
|
||||
def get_hist_config_sig(self, sample_size, config):
|
||||
config_values = get_config_values(config, self.config_type_dic)
|
||||
config_sig = str(sample_size) + '_' + str(config_values)
|
||||
return config_sig
|
||||
|
||||
def evaluate_proposed_config(self, config, sample_size, move):
|
||||
self.model_count += 1
|
||||
config_sig = self.get_hist_config_sig(sample_size, config)
|
||||
d = self.total_dim
|
||||
history_size_per_d = len(self.config_tried) / float(d)
|
||||
if config_sig in self.config_tried:
|
||||
val_loss, new_train_time = self.config_tried[config_sig]
|
||||
# print(config_sig,'found in history')
|
||||
model = train_loss = None
|
||||
from_history = True
|
||||
else:
|
||||
model, val_loss, train_loss, new_train_time, _ = \
|
||||
self.compute_with_config(self.estimator, config, sample_size)
|
||||
from_history = False
|
||||
if history_size_per_d < HISTORY_SIZE:
|
||||
self.config_tried[config_sig] = (val_loss, new_train_time)
|
||||
|
||||
if self.first_move:
|
||||
self.init_config_dic[sample_size] = config
|
||||
move = '_ini'
|
||||
self.base = self.base_ini
|
||||
self.num_noimprovement = 0
|
||||
move = str(self.estimator) + move
|
||||
return model, val_loss, new_train_time, from_history, train_loss
|
||||
|
||||
def update_current_config(self, config, val_loss, sample_size):
|
||||
if self.first_move or val_loss < self.current_config_loss:
|
||||
self.first_move = False
|
||||
# update current config and coressponding sample_size
|
||||
self.sample_size = sample_size
|
||||
self.config = config
|
||||
self.config_primary = {x: config[x]
|
||||
for x in self.config_primary.keys()}
|
||||
try:
|
||||
self.config_more = {x: config[x]
|
||||
for x in self.config_more.keys()}
|
||||
except:
|
||||
self.config_more = {}
|
||||
self.current_config_loss = val_loss
|
||||
|
||||
def update_reset_best_config_loss(self, sample_size, config, val_loss):
|
||||
if sample_size == self.data_size:
|
||||
if self.best_config_loss_dic_full_reset[1] is None:
|
||||
self.best_config_loss_dic_full_reset = [
|
||||
config, val_loss, self.model_count]
|
||||
else:
|
||||
full_reset_best_loss = self.best_config_loss_dic_full_reset[1]
|
||||
if val_loss < full_reset_best_loss:
|
||||
self.best_config_loss_dic_full_reset = [
|
||||
config, full_reset_best_loss, self.model_count]
|
||||
|
||||
def update_search_state_best(self, config, sample_size, model, val_loss,
|
||||
new_train_time, from_history):
|
||||
# upate the loss statistics for a particular sample size
|
||||
if sample_size not in self.best_config_loss_samplesize_dic:
|
||||
self.best_config_loss_samplesize_dic[sample_size] = [
|
||||
config, val_loss, self.model_count]
|
||||
else:
|
||||
s_best_loss = self.best_config_loss_samplesize_dic[sample_size][1]
|
||||
if val_loss < s_best_loss:
|
||||
self.best_config_loss_samplesize_dic[sample_size] = [
|
||||
config, val_loss, self.model_count]
|
||||
|
||||
self.update_reset_best_config_loss(sample_size, config, val_loss)
|
||||
|
||||
# update best model statistics, including statistics about loss and time
|
||||
if val_loss < self.new_loss:
|
||||
self.old_loss = self.new_loss if self.new_loss < float(
|
||||
'inf') else 2 * val_loss
|
||||
self.new_loss = val_loss
|
||||
self.old_loss_time = self.new_loss_time
|
||||
self.old_train_time = self.train_time
|
||||
self.new_loss_time = self.train_time = new_train_time
|
||||
if val_loss < self.best_loss:
|
||||
self.best_config = [self.config, self.model_count]
|
||||
if not from_history:
|
||||
self.trained_estimator = model
|
||||
# print(model)
|
||||
else:
|
||||
print(val_loss, self.best_loss)
|
||||
self.best_loss = val_loss
|
||||
self.time_best_found = self.time_from_start
|
||||
return True
|
||||
else:
|
||||
if not from_history:
|
||||
self.new_loss_time += new_train_time
|
||||
return False
|
||||
|
||||
def get_proposal(self, current_config, rand_vector_func, base, move_type):
|
||||
rand_vector = rand_vector_func(len(current_config))
|
||||
rand_vector = [i for i in rand_vector]
|
||||
rand_vector_neg = [-i for i in rand_vector]
|
||||
|
||||
move_vector = {}
|
||||
move_vector_neg = {}
|
||||
|
||||
index_ = 0
|
||||
for k, v in current_config.items():
|
||||
if 'geo' in move_type:
|
||||
# get the move vector using the proposed random vector
|
||||
move_vector[k] = v * (base**(rand_vector[index_]))
|
||||
move_vector_neg[k] = v * (base**(rand_vector_neg[index_]))
|
||||
else:
|
||||
move_vector[k] = v + (base * (rand_vector[index_]))
|
||||
move_vector_neg[k] = v + (base * (rand_vector_neg[index_]))
|
||||
index_ += 1
|
||||
|
||||
# as long as one of the proposed model (+ or -) is within the mem_limit
|
||||
# we will proceed
|
||||
if not self.use_dual_dir:
|
||||
move_vector_neg = None
|
||||
return move_vector, move_vector_neg
|
||||
|
||||
def get_config_from_move_vector(self, v, estimator_type):
|
||||
if v != None:
|
||||
if 'all' in estimator_type:
|
||||
v = v
|
||||
elif 'primary' in estimator_type:
|
||||
v = {**v, **self.config_more}
|
||||
else:
|
||||
v = {**self.config_primary, **v}
|
||||
|
||||
bounded_v = self.get_v_within_min_max(v)
|
||||
else:
|
||||
bounded_v = None
|
||||
return bounded_v
|
||||
|
||||
def dual_direction_sample(self, base, current_search_config,
|
||||
estimator_type='primary', rand_vector_func=rand_vector_unit_sphere,
|
||||
mem_thres=MEM_THRES, move_type='geo'):
|
||||
current_config = current_search_config
|
||||
if len(current_config) == 0:
|
||||
return None, None
|
||||
bounded_v_list = [None, None]
|
||||
while not bounded_v_list[0] and not bounded_v_list[
|
||||
1] and self.time_from_start < self.time_budget:
|
||||
move_vector, move_vector_neg = self.get_proposal(
|
||||
current_config, rand_vector_func,
|
||||
base, move_type)
|
||||
bounded_v_list = [move_vector, move_vector_neg]
|
||||
for i, v in enumerate(bounded_v_list):
|
||||
bounded_v = self.get_config_from_move_vector(v, estimator_type)
|
||||
proposed_model_size = self.get_size_for_config(bounded_v)
|
||||
proposed_model_size = 0 if not isinstance(
|
||||
proposed_model_size, float) else proposed_model_size
|
||||
if proposed_model_size > mem_thres:
|
||||
# print(bounded_v, proposed_model_size, mem_thres)
|
||||
bounded_v = None
|
||||
bounded_v_list[i] = bounded_v
|
||||
self.time_from_start = time.time() - self.start_time
|
||||
return bounded_v_list
|
||||
|
||||
def get_v_within_min_max(self, v):
|
||||
index_ = 0
|
||||
bounded_v = {}
|
||||
for key, value in v.items():
|
||||
new_value = min(max(
|
||||
value, self.min_config[index_]), self.max_config_dic[
|
||||
self.sample_size][index_])
|
||||
bounded_v[key] = new_value
|
||||
index_ += 1
|
||||
return bounded_v
|
||||
|
||||
def expected_time_improvement_search(self):
|
||||
return max(self.old_loss_time - self.old_train_time + self.train_time,
|
||||
self.new_loss_time)
|
||||
|
||||
def increase_sample_size(self):
|
||||
'''
|
||||
whether it's time to increase sample size
|
||||
'''
|
||||
expected_time_improvement_sample = 2 * self.train_time
|
||||
self.increase = self.sample_size < self.data_size and (
|
||||
self.estimator_type == 0 or self.dims[0] == 0) and (
|
||||
not self.improved
|
||||
or expected_time_improvement_sample
|
||||
< self.expected_time_improvement_search()
|
||||
)
|
||||
return self.increase
|
||||
|
||||
def search_begin(self, time_budget, start_time=None):
|
||||
self.time_budget = time_budget
|
||||
if not start_time:
|
||||
self.start_time = time.time()
|
||||
else:
|
||||
self.start_time = start_time
|
||||
# the time to train the last selected config
|
||||
self.old_train_time = self.train_time = 0
|
||||
self.time_from_start = 0
|
||||
# search states
|
||||
self.first_move = True
|
||||
self.improved = True
|
||||
self.estimator_type = 0 if self.dims[0] > 0 else 1
|
||||
|
||||
self.old_loss = self.new_loss = self.best_loss = float('+inf')
|
||||
# new_loss_time is the time from the beginning of training self.config to
|
||||
# now,
|
||||
# old_loss_time is the time from the beginning of training the old
|
||||
# self.config to the beginning of training self.config
|
||||
self.old_loss_time = self.new_loss_time = 0
|
||||
|
||||
self.trained_estimator = None
|
||||
self.model_count = 0
|
||||
self.K = 0
|
||||
self.old_modelcount = 0
|
||||
|
||||
# self.config has two parts: config_primary contain the configs
|
||||
# that are related with model complexity, config_more contains the
|
||||
# configs that is not related with model complexity
|
||||
self.config_primary = self.init_config_dic_primary[self.init_sample_size]
|
||||
self.config_more = self.init_config_dic_more[self.init_sample_size]
|
||||
self.config = {**self.config_primary, **self.config_more}
|
||||
self.best_config = [None, None]
|
||||
# key: sample size, value: [best_config, best_loss, model_count] under
|
||||
# sample size in the key
|
||||
self.best_config_loss_samplesize_dic = {
|
||||
self.init_sample_size: [self.config, self.old_loss, self.model_count]}
|
||||
# key: sample size, value: [best_config, best_loss, model_count] under
|
||||
# sample size in the key
|
||||
self.best_config_loss_dic_full_reset = [None, None, None]
|
||||
self.sample_size = self.init_sample_size
|
||||
self.base_change_bound = 1
|
||||
self.base_change_count = 0
|
||||
self.evaluate_config(self.config, self.sample_size, '_ini')
|
||||
self.increase = False
|
||||
|
||||
def train_config(self, config, sample_size):
|
||||
'''
|
||||
train a configuration
|
||||
'''
|
||||
# print('Evalute Config')
|
||||
if self.time_from_start >= self.time_budget:
|
||||
return False
|
||||
config_sig = self.get_hist_config_sig(sample_size, config)
|
||||
if not config_sig in self.config_tried:
|
||||
_, new_train_time = self.train_with_config(
|
||||
self.estimator, config, sample_size)
|
||||
train_loss, val_loss, move = None, self.new_loss, str(
|
||||
self.estimator) + '_trainAll'
|
||||
self.time_from_start = time.time() - self.start_time
|
||||
if self.save_helper is not None:
|
||||
self.save_helper.append(self.model_count,
|
||||
train_loss,
|
||||
new_train_time,
|
||||
self.time_from_start,
|
||||
val_loss,
|
||||
config,
|
||||
self.best_loss,
|
||||
self.best_config,
|
||||
move,
|
||||
sample_size)
|
||||
self.config_tried[config_sig] = (val_loss, new_train_time)
|
||||
|
||||
def try_increase_sample_size(self):
|
||||
# print( self.estimator, self.sample_size)
|
||||
if self.sample_size in self.next_sample_size:
|
||||
if self.increase_sample_size():
|
||||
self.first_move = True
|
||||
self.improved = True
|
||||
self.estimator_type = 0 if self.dims[0] > 0 else 1
|
||||
self.evaluate_config(
|
||||
self.config, self.next_sample_size[self.sample_size])
|
||||
if not self.old_modelcount and self.sample_size == self.data_size:
|
||||
self.old_modelcount = self.model_count
|
||||
|
||||
def setup_current_search_config(self):
|
||||
estimator_type = self.estimator_type_list[self.estimator_type]
|
||||
if 'all' in estimator_type:
|
||||
current_search_config = self.config
|
||||
elif 'primary' in estimator_type:
|
||||
current_search_config = self.config_primary
|
||||
else:
|
||||
current_search_config = self.config_more
|
||||
# print(self.config_more)
|
||||
return estimator_type, current_search_config
|
||||
|
||||
def search1step(self, global_best_loss=float('+inf'),
|
||||
retrain_full=True, mem_thres=MEM_THRES, reset_type='init_gaussian'):
|
||||
# try to increase sample size
|
||||
self.try_increase_sample_size()
|
||||
# decide current_search_config according to estimator_type
|
||||
estimator_type, current_search_config = \
|
||||
self.setup_current_search_config()
|
||||
time_left = self.time_budget - self.time_from_start
|
||||
if time_left < self.train_time:
|
||||
return False
|
||||
if retrain_full and self.train_time < time_left < 2 * self.train_time \
|
||||
and self.best_loss <= global_best_loss:
|
||||
self.train_config(self.best_config[0], self.sample_size_full)
|
||||
|
||||
move_vector, move_vector_neg = self.dual_direction_sample(
|
||||
self.base, current_search_config, estimator_type,
|
||||
rand_vector_unit_sphere, mem_thres, self.move_type)
|
||||
if move_vector is None:
|
||||
if move_vector_neg is None:
|
||||
self.improved = False
|
||||
else:
|
||||
self.improved = self.evaluate_config(
|
||||
move_vector_neg, self.sample_size, '_neg' + str(
|
||||
estimator_type))
|
||||
else:
|
||||
self.improved = self.evaluate_config(
|
||||
move_vector, self.sample_size, '_pos' + str(estimator_type))
|
||||
if not self.improved:
|
||||
if move_vector_neg is None:
|
||||
pass
|
||||
else:
|
||||
self.improved = self.evaluate_config(
|
||||
move_vector_neg, self.sample_size, '_neg' + str(
|
||||
estimator_type))
|
||||
self.update_noimprovement_stat(
|
||||
global_best_loss, retrain_full, reset_type)
|
||||
return self.improved
|
||||
|
||||
def update_noimprovement_stat(self, global_best_loss, retrain_full,
|
||||
reset_type):
|
||||
if self.improved:
|
||||
self.num_noimprovement = 0
|
||||
else:
|
||||
self.estimator_type = 1 - self.estimator_type
|
||||
if self.dims[self.estimator_type] == 0:
|
||||
self.estimator_type = 1 - self.estimator_type
|
||||
if self.estimator_type == 1 or self.dims[1] == 0:
|
||||
self.noimprovement(global_best_loss, retrain_full, reset_type)
|
||||
|
||||
def noimprovement(self, global_best_loss, retrain_full, reset_type='org'):
|
||||
if self.sample_size == self.data_size:
|
||||
# Do not wait until full sample size to update num_noimprovement?
|
||||
self.num_noimprovement += 1
|
||||
if self.num_noimprovement >= self.epo:
|
||||
self.num_noimprovement = 0
|
||||
# print(self.num_noimprovement, self.epo)
|
||||
if self.base_change == 'squareroot':
|
||||
self.base = math.sqrt(self.base)
|
||||
else:
|
||||
if self.K == 0: # first time
|
||||
oldK = self.best_config_loss_dic_full_reset[2] - \
|
||||
self.old_modelcount
|
||||
else:
|
||||
oldK = self.K
|
||||
self.K = self.model_count + 1 - self.old_modelcount
|
||||
if self.base_change == 'K':
|
||||
self.base **= oldK / self.K
|
||||
else:
|
||||
self.base **= math.sqrt(oldK / self.K)
|
||||
if self.dims[1] > 0 and self.dims[0] > 0:
|
||||
base_lower_bound = min(
|
||||
min(
|
||||
(1.0 + self.estimator_configspace[i].min_change
|
||||
/ self.config_primary[i])
|
||||
** math.sqrt(self.dims[0])
|
||||
for i in self.config_primary.keys()
|
||||
),
|
||||
min(
|
||||
(1.0 + self.estimator_configspace[i].min_change
|
||||
/ self.config_more[i])
|
||||
** math.sqrt(self.dims[1])
|
||||
for i in self.config_more.keys()
|
||||
)
|
||||
)
|
||||
elif self.dims[0] > 0:
|
||||
base_lower_bound = min(
|
||||
(1.0 + self.estimator_configspace[i].min_change
|
||||
/ self.config_primary[i])
|
||||
** math.sqrt(self.dims[0])
|
||||
for i in self.config_primary.keys()
|
||||
)
|
||||
else:
|
||||
base_lower_bound = min(
|
||||
(1.0 + self.estimator_configspace[i].min_change
|
||||
/ self.config_more[i])
|
||||
** math.sqrt(self.dims[1])
|
||||
for i in self.config_more.keys()
|
||||
)
|
||||
if np.isinf(base_lower_bound):
|
||||
base_lower_bound = BASE_LOWER_BOUND
|
||||
self.base_change_count += 1
|
||||
if self.base <= base_lower_bound or \
|
||||
self.base_change_count == self.base_change_bound:
|
||||
if retrain_full and self.sample_size == self.data_size:
|
||||
if self.best_loss <= global_best_loss:
|
||||
# Only train on full data when the curent estimator
|
||||
# is the best estimator
|
||||
# print('best estimator and train on full data')
|
||||
self.train_config(
|
||||
self.best_config[0], self.sample_size_full)
|
||||
# remaining time is more than enough for another trial
|
||||
if self.time_budget - self.time_from_start > self.train_time:
|
||||
self.base_change_bound <<= 1
|
||||
self.base_change_count = 0
|
||||
self.K = 0
|
||||
self.old_modelcount = self.model_count
|
||||
self.best_config_loss_dic_full_reset = [None, None,
|
||||
None]
|
||||
self.first_move = True
|
||||
self.improved = True
|
||||
self.base_ini = min(
|
||||
self.base_ini * 2, self.base_upper_bound[
|
||||
self.sample_size])
|
||||
self.estimator_type = 0 if self.dims[0] > 0 else 1
|
||||
reset_config, reset_sample_size = self.get_reset_config(
|
||||
self.init_sample_size, reset_type)
|
||||
self.sample_size = reset_sample_size
|
||||
# print('reset sample size', reset_sample_size)
|
||||
self.evaluate_config(reset_config, self.sample_size,
|
||||
'_ini')
|
||||
|
||||
def get_reset_config(self, sample_size, reset_type):
|
||||
init_config = self.init_config_dic[self.sample_size]
|
||||
reset_sample_size = sample_size
|
||||
if 'org' in reset_type:
|
||||
reset_config = init_config
|
||||
else:
|
||||
if 'init_gaussian' in reset_type:
|
||||
reset_config = init_config
|
||||
reset_sample_size = self.get_reset_sample_size(reset_config)
|
||||
config_values = get_config_values(
|
||||
reset_config, self.config_type_dic)
|
||||
config_sig = str(reset_sample_size) + '_' + str(config_values)
|
||||
count = 0
|
||||
while config_sig in self.config_tried and \
|
||||
self.time_from_start < self.time_budget and count < 1000:
|
||||
# TODO: check exhaustiveness? use time as condition?
|
||||
count += 1
|
||||
move, move_neg = self.dual_direction_sample(
|
||||
base=self.b, current_search_config=init_config,
|
||||
estimator_type='all',
|
||||
rand_vector_func=rand_vector_gaussian,
|
||||
move_type=self.move_type)
|
||||
if move:
|
||||
reset_config = move_neg
|
||||
elif move_neg:
|
||||
reset_config = move_neg
|
||||
else:
|
||||
continue
|
||||
reset_sample_size = self.get_reset_sample_size(
|
||||
reset_config)
|
||||
config_values = get_config_values(
|
||||
reset_config, self.config_type_dic)
|
||||
config_sig = str(reset_sample_size) + \
|
||||
'_' + str(config_values)
|
||||
self.time_from_start = time.time() - self.start_time
|
||||
else:
|
||||
raise NotImplementedError
|
||||
return reset_config, reset_sample_size
|
||||
|
||||
def get_reset_sample_size(self, reset_config):
|
||||
if not reset_config:
|
||||
print('reset_config is none')
|
||||
reset_config_size = self.get_size_for_config(reset_config)
|
||||
|
||||
candidate_sample_size_list = []
|
||||
for sample_size, config_and_bestloss in \
|
||||
self.best_config_loss_samplesize_dic.items():
|
||||
s_best_config = config_and_bestloss[0]
|
||||
if not s_best_config:
|
||||
print('best config is none', sample_size)
|
||||
s_best_config_model_size = self.get_size_for_config(s_best_config)
|
||||
if s_best_config_model_size >= reset_config_size:
|
||||
candidate_sample_size_list.append(sample_size)
|
||||
|
||||
if len(candidate_sample_size_list) != 0:
|
||||
return min(candidate_sample_size_list)
|
||||
else:
|
||||
return self.data_size
|
|
@ -0,0 +1,2 @@
|
|||
from .blendsearch import CFO, BlendSearch
|
||||
from .flow2 import FLOW2
|
|
@ -0,0 +1,419 @@
|
|||
'''!
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See LICENSE file in the
|
||||
* project root for license information.
|
||||
'''
|
||||
from typing import Dict, Optional, List, Tuple
|
||||
import numpy as np
|
||||
import time
|
||||
import pickle
|
||||
try:
|
||||
from ray.tune.suggest import Searcher
|
||||
from ray.tune.suggest.optuna import OptunaSearch as GlobalSearch
|
||||
from ray.tune.suggest.variant_generator import generate_variants
|
||||
except ImportError:
|
||||
from .suggestion import Searcher, OptunaSearch as GlobalSearch
|
||||
from .variant_generator import generate_variants
|
||||
from .search_thread import SearchThread
|
||||
from .flow2 import FLOW2 as LocalSearch
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BlendSearch(Searcher):
|
||||
'''class for BlendSearch algorithm
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
space: Optional[dict] = None,
|
||||
points_to_evaluate: Optional[List[Dict]] = None,
|
||||
cat_hp_cost: Optional[dict] = None,
|
||||
prune_attr: Optional[str] = None,
|
||||
min_resource: Optional[float] = None,
|
||||
max_resource: Optional[float] = None,
|
||||
reduction_factor: Optional[float] = None,
|
||||
resources_per_trial: Optional[dict] = None,
|
||||
global_search_alg: Optional[Searcher] = None,
|
||||
mem_size = None):
|
||||
'''Constructor
|
||||
|
||||
Args:
|
||||
metric: A string of the metric name to optimize for.
|
||||
minimization or maximization.
|
||||
mode: A string in ['min', 'max'] to specify the objective as
|
||||
space: A dictionary to specify the search space.
|
||||
points_to_evaluate: Initial parameter suggestions to be run first.
|
||||
The first element needs to be a dictionary from a subset of
|
||||
controlled dimensions to the initial low-cost values.
|
||||
e.g.,
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[{'epochs': 1}]
|
||||
|
||||
cat_hp_cost: A dictionary from a subset of categorical dimensions
|
||||
to the relative cost of each choice.
|
||||
e.g.,
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{'tree_method': [1, 1, 2]}
|
||||
|
||||
i.e., the relative cost of the
|
||||
three choices of 'tree_method' is 1, 1 and 2 respectively.
|
||||
prune_attr: A string of the attribute used for pruning.
|
||||
Not necessarily in space.
|
||||
When prune_attr is in space, it is a hyperparameter, e.g.,
|
||||
'n_iters', and the best value is unknown.
|
||||
When prune_attr is not in space, it is a resource dimension,
|
||||
e.g., 'sample_size', and the peak performance is assumed
|
||||
to be at the max_resource.
|
||||
min_resource: A float of the minimal resource to use for the
|
||||
prune_attr; only valid if prune_attr is not in space.
|
||||
max_resource: A float of the maximal resource to use for the
|
||||
prune_attr; only valid if prune_attr is not in space.
|
||||
reduction_factor: A float of the reduction factor used for
|
||||
incremental pruning.
|
||||
resources_per_trial: A dictionary of the resources permitted per
|
||||
trial, such as 'mem'.
|
||||
global_search_alg: A Searcher instance as the global search
|
||||
instance. If omitted, Optuna is used. The following algos have
|
||||
known issues when used as global_search_alg:
|
||||
- HyperOptSearch raises exception sometimes
|
||||
- TuneBOHB has its own scheduler
|
||||
mem_size: A function to estimate the memory size for a given config.
|
||||
'''
|
||||
self._metric, self._mode = metric, mode
|
||||
if points_to_evaluate: init_config = points_to_evaluate[0]
|
||||
else: init_config = {}
|
||||
self._points_to_evaluate = points_to_evaluate
|
||||
if global_search_alg is not None:
|
||||
self._gs = global_search_alg
|
||||
elif getattr(self, '__name__', None) != 'CFO':
|
||||
self._gs = GlobalSearch(space=space, metric=metric, mode=mode)
|
||||
else:
|
||||
self._gs = None
|
||||
self._ls = LocalSearch(init_config, metric, mode, cat_hp_cost, space,
|
||||
prune_attr, min_resource, max_resource, reduction_factor)
|
||||
self._resources_per_trial = resources_per_trial
|
||||
self._mem_size = mem_size
|
||||
self._mem_threshold = resources_per_trial.get(
|
||||
'mem') if resources_per_trial else None
|
||||
self._init_search()
|
||||
|
||||
def set_search_properties(self,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
config: Optional[Dict] = None) -> bool:
|
||||
if self._ls.space:
|
||||
if 'time_budget_s' in config:
|
||||
self._deadline = config.get('time_budget_s') + time.time()
|
||||
if 'metric_target' in config:
|
||||
self._metric_target = config.get('metric_target')
|
||||
else:
|
||||
self._metric, self._mode = metric, mode
|
||||
self._ls.set_search_properties(metric, mode, config)
|
||||
self._gs.set_search_properties(metric, mode, config)
|
||||
self._init_search()
|
||||
return True
|
||||
|
||||
def _init_search(self):
|
||||
'''initialize the search
|
||||
'''
|
||||
self._metric_target = np.inf * self._ls.metric_op
|
||||
self._search_thread_pool = {
|
||||
# id: int -> thread: SearchThread
|
||||
0: SearchThread(self._ls.mode, self._gs)
|
||||
}
|
||||
self._thread_count = 1 # total # threads created
|
||||
self._init_used = self._ls.init_config is None
|
||||
self._trial_proposed_by = {} # trial_id: str -> thread_id: int
|
||||
self._admissible_min = self._ls.normalize(self._ls.init_config)
|
||||
self._admissible_max = self._admissible_min.copy()
|
||||
self._result = {} # config_signature: tuple -> result: Dict
|
||||
self._deadline = np.inf
|
||||
|
||||
def save(self, checkpoint_path: str):
|
||||
save_object = (self._metric_target, self._search_thread_pool,
|
||||
self._thread_count, self._init_used, self._trial_proposed_by,
|
||||
self._admissible_min, self._admissible_max, self._result,
|
||||
self._deadline)
|
||||
with open(checkpoint_path, "wb") as outputFile:
|
||||
pickle.dump(save_object, outputFile)
|
||||
|
||||
def restore(self, checkpoint_path: str):
|
||||
with open(checkpoint_path, "rb") as inputFile:
|
||||
save_object = pickle.load(inputFile)
|
||||
self._metric_target, self._search_thread_pool, \
|
||||
self._thread_count, self._init_used, self._trial_proposed_by, \
|
||||
self._admissible_min, self._admissible_max, self._result, \
|
||||
self._deadline = save_object
|
||||
|
||||
def restore_from_dir(self, checkpoint_dir: str):
|
||||
super.restore_from_dir(checkpoint_dir)
|
||||
|
||||
def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None,
|
||||
error: bool = False):
|
||||
''' search thread updater and cleaner
|
||||
'''
|
||||
thread_id = self._trial_proposed_by.get(trial_id)
|
||||
if thread_id in self._search_thread_pool:
|
||||
self._search_thread_pool[thread_id].on_trial_complete(
|
||||
trial_id, result, error)
|
||||
del self._trial_proposed_by[trial_id]
|
||||
# if not thread_id: logger.info(f"result {result}")
|
||||
if result:
|
||||
config = {}
|
||||
for key, value in result.items():
|
||||
if key.startswith('config/'):
|
||||
config[key[7:]] = value
|
||||
if error: # remove from result cache
|
||||
del self._result[self._ls.config_signature(config)]
|
||||
else: # add to result cache
|
||||
self._result[self._ls.config_signature(config)] = result
|
||||
# update target metric if improved
|
||||
if (result[self._metric]-self._metric_target)*self._ls.metric_op<0:
|
||||
self._metric_target = result[self._metric]
|
||||
if thread_id: # from local search
|
||||
# update admissible region
|
||||
normalized_config = self._ls.normalize(config)
|
||||
for key in self._admissible_min:
|
||||
value = normalized_config[key]
|
||||
if value > self._admissible_max[key]:
|
||||
self._admissible_max[key] = value
|
||||
elif value < self._admissible_min[key]:
|
||||
self._admissible_min[key] = value
|
||||
elif self._create_condition(result):
|
||||
# thread creator
|
||||
self._search_thread_pool[self._thread_count] = SearchThread(
|
||||
self._ls.mode,
|
||||
self._ls.create(config, result[self._metric], cost=result[
|
||||
"time_total_s"])
|
||||
)
|
||||
thread_id = self._thread_count
|
||||
self._thread_count += 1
|
||||
|
||||
# cleaner
|
||||
# logger.info(f"thread {thread_id} in search thread pool="
|
||||
# f"{thread_id in self._search_thread_pool}")
|
||||
if thread_id and thread_id in self._search_thread_pool:
|
||||
# local search thread
|
||||
self._clean(thread_id)
|
||||
|
||||
def _create_condition(self, result: Dict) -> bool:
|
||||
''' create thread condition
|
||||
'''
|
||||
if len(self._search_thread_pool) < 2: return True
|
||||
obj_median = np.median([thread.obj_best1 for id, thread in
|
||||
self._search_thread_pool.items() if id])
|
||||
return result[self._metric] * self._ls.metric_op < obj_median
|
||||
|
||||
def _clean(self, thread_id: int):
|
||||
''' delete thread and increase admissible region if converged,
|
||||
merge local threads if they are close
|
||||
'''
|
||||
assert thread_id
|
||||
todelete = set()
|
||||
for id in self._search_thread_pool:
|
||||
if id and id!=thread_id:
|
||||
if self._inferior(id, thread_id):
|
||||
todelete.add(id)
|
||||
for id in self._search_thread_pool:
|
||||
if id and id!=thread_id:
|
||||
if self._inferior(thread_id, id):
|
||||
todelete.add(thread_id)
|
||||
break
|
||||
# logger.info(f"thead {thread_id}.converged="
|
||||
# f"{self._search_thread_pool[thread_id].converged}")
|
||||
if self._search_thread_pool[thread_id].converged:
|
||||
todelete.add(thread_id)
|
||||
for key in self._admissible_min:
|
||||
self._admissible_max[key] += self._ls.STEPSIZE
|
||||
self._admissible_min[key] -= self._ls.STEPSIZE
|
||||
for id in todelete:
|
||||
del self._search_thread_pool[id]
|
||||
|
||||
def _inferior(self, id1: int, id2: int) -> bool:
|
||||
''' whether thread id1 is inferior to id2
|
||||
'''
|
||||
t1 = self._search_thread_pool[id1]
|
||||
t2 = self._search_thread_pool[id2]
|
||||
if t1.obj_best1 < t2.obj_best2: return False
|
||||
elif t1.resource and t1.resource < t2.resource: return False
|
||||
elif t2.reach(t1): return True
|
||||
else: return False
|
||||
|
||||
def on_trial_result(self, trial_id: str, result: Dict):
|
||||
if trial_id not in self._trial_proposed_by: return
|
||||
thread_id = self._trial_proposed_by[trial_id]
|
||||
if not thread_id in self._search_thread_pool: return
|
||||
self._search_thread_pool[thread_id].on_trial_result(trial_id, result)
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
''' choose thread, suggest a valid config
|
||||
'''
|
||||
if self._init_used and not self._points_to_evaluate:
|
||||
choice, backup = self._select_thread()
|
||||
# logger.debug(f"choice={choice}, backup={backup}")
|
||||
if choice < 0: return None # timeout
|
||||
self._use_rs = False
|
||||
config = self._search_thread_pool[choice].suggest(trial_id)
|
||||
skip = self._should_skip(choice, trial_id, config)
|
||||
if skip:
|
||||
if choice:
|
||||
# logger.info(f"skipping choice={choice}, config={config}")
|
||||
return None
|
||||
# use rs
|
||||
self._use_rs = True
|
||||
for _, generated in generate_variants(
|
||||
{'config': self._ls.space}):
|
||||
config = generated['config']
|
||||
break
|
||||
# logger.debug(f"random config {config}")
|
||||
skip = self._should_skip(choice, trial_id, config)
|
||||
if skip: return None
|
||||
# if not choice: logger.info(config)
|
||||
if choice or backup == choice or self._valid(config):
|
||||
# LS or valid or no backup choice
|
||||
self._trial_proposed_by[trial_id] = choice
|
||||
else: # invalid config proposed by GS
|
||||
if not self._use_rs:
|
||||
self._search_thread_pool[choice].on_trial_complete(
|
||||
trial_id, {}, error=True) # tell GS there is an error
|
||||
self._use_rs = False
|
||||
config = self._search_thread_pool[backup].suggest(trial_id)
|
||||
skip = self._should_skip(backup, trial_id, config)
|
||||
if skip:
|
||||
return None
|
||||
self._trial_proposed_by[trial_id] = backup
|
||||
choice = backup
|
||||
# if choice: self._pending.add(choice) # local search thread pending
|
||||
if not choice:
|
||||
if self._ls._resource:
|
||||
# TODO: add resource to config proposed by GS, min or median?
|
||||
config[self._ls.prune_attr] = self._ls.min_resource
|
||||
self._result[self._ls.config_signature(config)] = {}
|
||||
else: # use init config
|
||||
init_config = self._points_to_evaluate.pop(
|
||||
0) if self._points_to_evaluate else self._ls.init_config
|
||||
if init_config==self._ls.init_config:
|
||||
config = self._ls.complete_config(init_config,
|
||||
self._admissible_min, self._admissible_max)
|
||||
# logger.info(f"reset config to {config}")
|
||||
else: config = init_config
|
||||
config_signature = self._ls.config_signature(config)
|
||||
result = self._result.get(config_signature)
|
||||
if result: # tried before
|
||||
# self.on_trial_complete(trial_id, result)
|
||||
return None
|
||||
elif result is None: # not tried before
|
||||
self._result[config_signature] = {}
|
||||
else: return None # running but no result yet
|
||||
self._init_used = True
|
||||
self._trial_proposed_by[trial_id] = 0
|
||||
# logger.info(f"config={config}")
|
||||
return config
|
||||
|
||||
def _should_skip(self, choice, trial_id, config) -> bool:
|
||||
''' if config is None or config's result is known or above mem threshold
|
||||
return True; o.w. return False
|
||||
'''
|
||||
if config is None: return True
|
||||
config_signature = self._ls.config_signature(config)
|
||||
exists = config_signature in self._result
|
||||
# check mem constraint
|
||||
if not exists and self._mem_threshold and self._mem_size(
|
||||
config)>self._mem_threshold:
|
||||
self._result[config_signature] = {
|
||||
self._metric:np.inf*self._ls.metric_op, 'time_total_s':1}
|
||||
exists = True
|
||||
if exists:
|
||||
if not self._use_rs:
|
||||
result = self._result.get(config_signature)
|
||||
if result:
|
||||
self._search_thread_pool[choice].on_trial_complete(
|
||||
trial_id, result, error=False)
|
||||
if choice:
|
||||
# local search thread
|
||||
self._clean(choice)
|
||||
else:
|
||||
# tell the thread there is an error
|
||||
self._search_thread_pool[choice].on_trial_complete(
|
||||
trial_id, {}, error=True)
|
||||
return True
|
||||
return False
|
||||
|
||||
def _select_thread(self) -> Tuple:
|
||||
''' thread selector; use can_suggest to check LS availability
|
||||
'''
|
||||
# update priority
|
||||
min_eci = self._deadline - time.time()
|
||||
if min_eci <= 0: return -1, -1
|
||||
max_speed = 0
|
||||
for thread in self._search_thread_pool.values():
|
||||
if thread.speed > max_speed: max_speed = thread.speed
|
||||
for thread in self._search_thread_pool.values():
|
||||
thread.update_eci(self._metric_target, max_speed)
|
||||
if thread.eci < min_eci: min_eci = thread.eci
|
||||
for thread in self._search_thread_pool.values():
|
||||
thread.update_priority(min_eci)
|
||||
|
||||
top_thread_id = backup_thread_id = 0
|
||||
priority1 = priority2 = self._search_thread_pool[0].priority
|
||||
# logger.debug(f"priority of thread 0={priority1}")
|
||||
for thread_id, thread in self._search_thread_pool.items():
|
||||
# if thread_id:
|
||||
# logger.debug(
|
||||
# f"priority of thread {thread_id}={thread.priority}")
|
||||
# logger.debug(
|
||||
# f"thread {thread_id}.can_suggest={thread.can_suggest}")
|
||||
if thread_id and thread.can_suggest:
|
||||
priority = thread.priority
|
||||
if priority > priority1:
|
||||
priority1 = priority
|
||||
top_thread_id = thread_id
|
||||
if priority > priority2 or backup_thread_id == 0:
|
||||
priority2 = priority
|
||||
backup_thread_id = thread_id
|
||||
return top_thread_id, backup_thread_id
|
||||
|
||||
def _valid(self, config: Dict) -> bool:
|
||||
''' config validator
|
||||
'''
|
||||
for key in self._admissible_min:
|
||||
if key in config:
|
||||
value = config[key]
|
||||
# logger.info(
|
||||
# f"{key},{value},{self._admissible_min[key]},{self._admissible_max[key]}")
|
||||
if value<self._admissible_min[
|
||||
key] or value>self._admissible_max[key]:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class CFO(BlendSearch):
|
||||
''' class for CFO algorithm
|
||||
Number of threads is 1 or 2. Thread 0 is a vacuous thread.
|
||||
'''
|
||||
|
||||
__name__ = 'CFO'
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
assert len(self._search_thread_pool)<3, len(self._search_thread_pool)
|
||||
if len(self._search_thread_pool) < 2:
|
||||
# When a local converges, the number of threads is 1.
|
||||
# Need to restart
|
||||
self._init_used = False
|
||||
return super().suggest(trial_id)
|
||||
|
||||
def _select_thread(self) -> Tuple:
|
||||
for key in self._search_thread_pool:
|
||||
if key: return key, key
|
||||
|
||||
def _create_condition(self, result: Dict) -> bool:
|
||||
''' create thread condition
|
||||
'''
|
||||
return len(self._search_thread_pool) < 2
|
|
@ -0,0 +1,588 @@
|
|||
'''!
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See LICENSE file in the
|
||||
* project root for license information.
|
||||
'''
|
||||
from typing import Dict, Optional
|
||||
import numpy as np
|
||||
try:
|
||||
from ray.tune.suggest import Searcher
|
||||
from ray.tune.suggest.variant_generator import generate_variants
|
||||
from ray.tune import sample
|
||||
except ImportError:
|
||||
from .suggestion import Searcher
|
||||
from .variant_generator import generate_variants
|
||||
from ..tune import sample
|
||||
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FLOW2(Searcher):
|
||||
'''Local search algorithm FLOW2, with adaptive step size
|
||||
'''
|
||||
|
||||
STEPSIZE = 0.1
|
||||
STEP_LOWER_BOUND = 0.0001
|
||||
cost_attr = 'time_total_s'
|
||||
|
||||
def __init__(self,
|
||||
init_config: dict,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
cat_hp_cost: Optional[dict] = None,
|
||||
space: Optional[dict] = None,
|
||||
prune_attr: Optional[str] = None,
|
||||
min_resource: Optional[float] = None,
|
||||
max_resource: Optional[float] = None,
|
||||
resource_multiple_factor: Optional[float] = 4,
|
||||
seed: Optional[int] = 20):
|
||||
'''Constructor
|
||||
|
||||
Args:
|
||||
init_config: a dictionary from a subset of controlled dimensions
|
||||
to the initial low-cost values. e.g. {'epochs':1}
|
||||
metric: A string of the metric name to optimize for.
|
||||
minimization or maximization.
|
||||
mode: A string in ['min', 'max'] to specify the objective as
|
||||
cat_hp_cost: A dictionary from a subset of categorical dimensions
|
||||
to the relative cost of each choice.
|
||||
e.g.,
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{'tree_method': [1, 1, 2]}
|
||||
|
||||
i.e., the relative cost of the
|
||||
three choices of 'tree_method' is 1, 1 and 2 respectively.
|
||||
space: A dictionary to specify the search space.
|
||||
prune_attr: A string of the attribute used for pruning.
|
||||
Not necessarily in space.
|
||||
When prune_attr is in space, it is a hyperparameter, e.g.,
|
||||
'n_iters', and the best value is unknown.
|
||||
When prune_attr is not in space, it is a resource dimension,
|
||||
e.g., 'sample_size', and the peak performance is assumed
|
||||
to be at the max_resource.
|
||||
min_resource: A float of the minimal resource to use for the
|
||||
prune_attr; only valid if prune_attr is not in space.
|
||||
max_resource: A float of the maximal resource to use for the
|
||||
prune_attr; only valid if prune_attr is not in space.
|
||||
resource_multiple_factor: A float of the multiplicative factor
|
||||
used for increasing resource.
|
||||
seed: An integer of the random seed.
|
||||
'''
|
||||
if mode:
|
||||
assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
|
||||
else:
|
||||
mode = "min"
|
||||
|
||||
super(FLOW2, self).__init__(
|
||||
metric=metric,
|
||||
mode=mode)
|
||||
# internally minimizes, so "max" => -1
|
||||
if mode == "max":
|
||||
self.metric_op = -1.
|
||||
elif mode == "min":
|
||||
self.metric_op = 1.
|
||||
self.space = space or {}
|
||||
self._random = np.random.RandomState(seed)
|
||||
self._seed = seed
|
||||
if not init_config:
|
||||
logger.warning(
|
||||
"No init config given to FLOW2. Using random initial config."
|
||||
"For cost-frugal search, "
|
||||
"consider providing init values for cost-related hps via "
|
||||
"'init_config'."
|
||||
)
|
||||
self.init_config = self.best_config = init_config
|
||||
self.cat_hp_cost = cat_hp_cost
|
||||
self.prune_attr = prune_attr
|
||||
self.min_resource = min_resource
|
||||
self.resource_multiple_factor = resource_multiple_factor or 4
|
||||
self.max_resource = max_resource
|
||||
self._resource = None
|
||||
self._step_lb = np.Inf
|
||||
if space:
|
||||
self._init_search()
|
||||
|
||||
def _init_search(self):
|
||||
self._tunable_keys = []
|
||||
self._bounded_keys = []
|
||||
# choices of numeric values. integer encoding.
|
||||
# value: (ordered list of choices,
|
||||
# dict from choice to index in the ordered list)
|
||||
self._ordered_choice_hp = {}
|
||||
# choices with given cost. integer encoding.
|
||||
# value: (array of choices ordered by cost,
|
||||
# dict from choice to index in the ordered array)
|
||||
self._ordered_cat_hp = {}
|
||||
# unordered choices. value: cardinality
|
||||
self._unordered_cat_hp = {}
|
||||
self._cat_hp_cost = {}
|
||||
for key, domain in self.space.items():
|
||||
assert not isinstance(domain, dict), \
|
||||
key+"'s domain is grid search which is not supported in FLOW2."
|
||||
if callable(getattr(domain, 'get_sampler', None)):
|
||||
self._tunable_keys.append(key)
|
||||
sampler = domain.get_sampler()
|
||||
if isinstance(sampler, sample.Quantized):
|
||||
sampler_inner = sampler.get_sampler()
|
||||
if str(sampler_inner) == 'Uniform':
|
||||
self._step_lb = min(
|
||||
self._step_lb, sampler.q/(domain.upper-domain.lower))
|
||||
elif isinstance(domain, sample.Integer) and str(
|
||||
sampler) == 'Uniform':
|
||||
self._step_lb = min(
|
||||
self._step_lb, 1.0/(domain.upper-domain.lower))
|
||||
elif isinstance(domain, sample.Categorical):
|
||||
cat_hp_cost = self.cat_hp_cost
|
||||
if cat_hp_cost and key in cat_hp_cost:
|
||||
cost = np.array(cat_hp_cost[key])
|
||||
ind = np.argsort(cost)
|
||||
l = np.array(domain.categories)[ind]
|
||||
cost = self._cat_hp_cost[key] = cost[ind]
|
||||
d = {}
|
||||
for i, choice in enumerate(l):
|
||||
d[choice] = i
|
||||
self._ordered_cat_hp[key] = (l, d)
|
||||
self._step_lb = min(self._step_lb, 1.0/len(l))
|
||||
elif all(isinstance(x, int) or isinstance(x, float)
|
||||
for x in domain.categories):
|
||||
l = sorted(domain.categories)
|
||||
d = {}
|
||||
for i, choice in enumerate(l):
|
||||
d[choice] = i
|
||||
self._ordered_choice_hp[key] = (l, d)
|
||||
self._step_lb = min(self._step_lb, 1.0/len(l))
|
||||
else:
|
||||
self._unordered_cat_hp[key] = l = len(domain.categories)
|
||||
self._step_lb = min(self._step_lb, 1.0/l)
|
||||
if str(sampler) != 'Normal':
|
||||
self._bounded_keys.append(key)
|
||||
self._space_keys = list(self.space.keys())
|
||||
if (self.prune_attr and self.prune_attr not in self.space and
|
||||
self.max_resource):
|
||||
self._space_keys.append(self.prune_attr)
|
||||
self.min_resource = self.min_resource or self._min_resource()
|
||||
self._resource = self._round(self.min_resource)
|
||||
# logger.info(min_resource)
|
||||
# logger.info(max_resource)
|
||||
# logger.info(self._resource)
|
||||
else: self._resource = None
|
||||
self.incumbent = {}
|
||||
self.incumbent = self.normalize(self.init_config)
|
||||
self.best_obj = self.cost_incumbent = None
|
||||
self.dim = len(self._tunable_keys) # total # tunable dimensions
|
||||
self._direction_tried = None
|
||||
self._num_complete4incumbent = self._cost_complete4incumbent = 0
|
||||
self._num_allowed4incumbent = 2 * self.dim
|
||||
self._proposed_by = {} # trial_id: int -> incumbent: Dict
|
||||
self.step = self.STEPSIZE * np.sqrt(self.dim)
|
||||
lb = self.step_lower_bound
|
||||
if lb > self.step: self.step = lb * 2
|
||||
# upper bound
|
||||
self.step_ub = np.sqrt(self.dim)
|
||||
if self.step > self.step_ub: self.step = self.step_ub
|
||||
# maximal # consecutive no improvements
|
||||
self.dir = 2**(self.dim)
|
||||
self._configs = {} # dict from trial_id to config
|
||||
self._K = 0
|
||||
self._iter_best_config = self.trial_count = 1
|
||||
self._reset_times = 0
|
||||
|
||||
@property
|
||||
def step_lower_bound(self) -> float:
|
||||
step_lb = self._step_lb
|
||||
for key in self._tunable_keys:
|
||||
domain = self.space[key]
|
||||
sampler = domain.get_sampler()
|
||||
if isinstance(sampler, sample.Quantized):
|
||||
sampler_inner = sampler.get_sampler()
|
||||
if str(sampler_inner) == 'LogUniform':
|
||||
step_lb = min(step_lb,
|
||||
np.log(1.0+sampler.q/self.best_config[key])/
|
||||
np.log(domain.upper/domain.lower))
|
||||
elif isinstance(domain, sample.Integer) and str(
|
||||
sampler) == 'LogUniform':
|
||||
step_lb = min(step_lb,
|
||||
np.log(1.0+1.0/self.best_config[key])/
|
||||
np.log(domain.upper/domain.lower))
|
||||
if np.isinf(step_lb): step_lb = self.STEP_LOWER_BOUND
|
||||
else: step_lb *= np.sqrt(self.dim)
|
||||
return step_lb
|
||||
|
||||
@property
|
||||
def resource(self) -> float:
|
||||
return self._resource
|
||||
|
||||
def _min_resource(self) -> float:
|
||||
''' automatically decide minimal resource
|
||||
'''
|
||||
return self.max_resource / np.pow(self.resource_multiple_factor, 5)
|
||||
|
||||
def _round(self, resource) -> float:
|
||||
''' round the resource to self.max_resource if close to it
|
||||
'''
|
||||
if resource * self.resource_multiple_factor > self.max_resource:
|
||||
return self.max_resource
|
||||
return resource
|
||||
|
||||
def rand_vector_gaussian(self, dim, std = 1.0):
|
||||
vec = self._random.normal(0, std, dim)
|
||||
return vec
|
||||
|
||||
def complete_config(self, partial_config: Dict,
|
||||
lower: Optional[Dict] = None, upper: Optional[Dict] = None) -> Dict:
|
||||
''' generate a complete config from the partial config input
|
||||
add minimal resource to config if available
|
||||
'''
|
||||
if self._reset_times: # not the first time, use random gaussian
|
||||
normalized = self.normalize(partial_config)
|
||||
for key in normalized:
|
||||
# don't change unordered cat choice
|
||||
if key not in self._unordered_cat_hp:
|
||||
if upper and lower:
|
||||
u, l = upper[key], lower[key]
|
||||
gauss_std = u-l
|
||||
# allowed bound
|
||||
u += self.STEPSIZE
|
||||
l -= self.STEPSIZE
|
||||
elif key in self._bounded_keys:
|
||||
u, l, gauss_std = 1, 0, 1.0
|
||||
else: u, l, gauss_std = np.Inf, -np.Inf, 1.0
|
||||
if key in self._bounded_keys:
|
||||
u = min(u, 1)
|
||||
l = max(l, 0)
|
||||
delta = self.rand_vector_gaussian(1, gauss_std)[0]
|
||||
normalized[key] = max(l, min(u, normalized[key] + delta))
|
||||
# use best config for unordered cat choice
|
||||
config = self.denormalize(normalized)
|
||||
else:
|
||||
config = partial_config.copy()
|
||||
|
||||
for key, value in self.space.items():
|
||||
if key not in config:
|
||||
config[key] = value
|
||||
logger.debug(f'before random {config}')
|
||||
for _, generated in generate_variants({'config': config}):
|
||||
config = generated['config']
|
||||
break
|
||||
logger.debug(f'after random {config}')
|
||||
|
||||
if self._resource:
|
||||
config[self.prune_attr] = self.min_resource
|
||||
self._reset_times += 1
|
||||
return config
|
||||
|
||||
def create(self, init_config: Dict, obj: float, cost: float) -> Searcher:
|
||||
flow2 = FLOW2(init_config, self.metric, self.mode, self._cat_hp_cost,
|
||||
self.space, self.prune_attr, self.min_resource,
|
||||
self.max_resource, self.resource_multiple_factor,
|
||||
self._seed+1)
|
||||
flow2.best_obj = obj * self.metric_op # minimize internally
|
||||
flow2.cost_incumbent = cost
|
||||
return flow2
|
||||
|
||||
def normalize(self, config) -> Dict:
|
||||
''' normalize each dimension in config to [0,1]
|
||||
'''
|
||||
config_norm = {}
|
||||
for key, value in config.items():
|
||||
if key in self.space:
|
||||
# domain: sample.Categorical/Integer/Float/Function
|
||||
domain = self.space[key]
|
||||
if not callable(getattr(domain, 'get_sampler', None)):
|
||||
config_norm[key] = value
|
||||
else:
|
||||
if isinstance(domain, sample.Categorical):
|
||||
# normalize categorical
|
||||
if key in self._ordered_cat_hp:
|
||||
l, d = self._ordered_cat_hp[key]
|
||||
config_norm[key] = d[value]/len(l)
|
||||
elif key in self._ordered_choice_hp:
|
||||
l, d = self._ordered_choice_hp[key]
|
||||
config_norm[key] = d[value]/len(l)
|
||||
elif key in self.incumbent:
|
||||
config_norm[key] = self.incumbent[
|
||||
key] if value == self.best_config[
|
||||
key] else (self.incumbent[
|
||||
key]+1)%self._unordered_cat_hp[key]
|
||||
else: config_norm[key] = 0
|
||||
continue
|
||||
# Uniform/LogUniform/Normal/Base
|
||||
sampler = domain.get_sampler()
|
||||
if isinstance(sampler, sample.Quantized):
|
||||
# sampler is sample.Quantized
|
||||
sampler = sampler.get_sampler()
|
||||
if str(sampler) == 'LogUniform':
|
||||
config_norm[key] = np.log(
|
||||
value/domain.lower)/np.log(domain.upper/domain.lower)
|
||||
elif str(sampler) == 'Uniform':
|
||||
config_norm[key] = (
|
||||
value-domain.lower)/(domain.upper-domain.lower)
|
||||
elif str(sampler) == 'Normal':
|
||||
# N(mean, sd) -> N(0,1)
|
||||
config_norm[key] = (value - sampler.mean) / sampler.sd
|
||||
else:
|
||||
# TODO? elif str(sampler) == 'Base': # sample.Function._CallSampler
|
||||
# e.g., {test: sample_from(lambda spec: randn(10, 2).sample() * 0.01)}
|
||||
config_norm[key] = value
|
||||
# print(key+"'s value is not normalized")
|
||||
else: # prune_attr
|
||||
config_norm[key] = value
|
||||
return config_norm
|
||||
|
||||
def denormalize(self, config):
|
||||
''' denormalize each dimension in config from [0,1]
|
||||
'''
|
||||
config_denorm = {}
|
||||
for key, value in config.items():
|
||||
if key in self.space:
|
||||
# domain: sample.Categorical/Integer/Float/Function
|
||||
domain = self.space[key]
|
||||
if not callable(getattr(domain, 'get_sampler', None)):
|
||||
config_denorm[key] = value
|
||||
else:
|
||||
if isinstance(domain, sample.Categorical):
|
||||
# denormalize categorical
|
||||
if key in self._ordered_cat_hp:
|
||||
l, _ = self._ordered_cat_hp[key]
|
||||
n = len(l)
|
||||
config_denorm[key] = l[min(n-1,int(np.floor(value*n)))]
|
||||
elif key in self._ordered_choice_hp:
|
||||
l, _ = self._ordered_choice_hp[key]
|
||||
n = len(l)
|
||||
config_denorm[key] = l[min(n-1,int(np.floor(value*n)))]
|
||||
else:
|
||||
assert key in self.incumbent
|
||||
if round(value) == self.incumbent[key]:
|
||||
config_denorm[key] = self.best_config[key]
|
||||
else: # ****random value each time!****
|
||||
config_denorm[key] = self._random.choice([x
|
||||
for x in domain.categories
|
||||
if x!=self.best_config[key]])
|
||||
continue
|
||||
# Uniform/LogUniform/Normal/Base
|
||||
sampler = domain.get_sampler()
|
||||
if isinstance(sampler, sample.Quantized):
|
||||
# sampler is sample.Quantized
|
||||
sampler = sampler.get_sampler()
|
||||
# Handle Log/Uniform
|
||||
if str(sampler) == 'LogUniform':
|
||||
config_denorm[key] = (
|
||||
domain.upper/domain.lower)**value*domain.lower
|
||||
elif str(sampler) == 'Uniform':
|
||||
config_denorm[key] = value * (
|
||||
domain.upper-domain.lower) + domain.lower
|
||||
elif str(sampler) == 'Normal':
|
||||
# denormalization for 'Normal'
|
||||
config_denorm[key] = value * sampler.sd + sampler.mean
|
||||
else:
|
||||
config_denorm[key] = value
|
||||
# Handle quantized
|
||||
sampler = domain.get_sampler()
|
||||
if isinstance(sampler, sample.Quantized):
|
||||
config_denorm[key] = np.round(
|
||||
np.divide(config_denorm[key], sampler.q)) * sampler.q
|
||||
# Handle int (4.6 -> 5)
|
||||
if isinstance(domain, sample.Integer):
|
||||
config_denorm[key] = int(round(config_denorm[key]))
|
||||
# Handle int (4.6 -> 4)
|
||||
# config_denorm[key] = domain.cast(config_denorm[key])
|
||||
else: # prune_attr
|
||||
config_denorm[key] = value
|
||||
return config_denorm
|
||||
|
||||
def set_search_properties(self,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
config: Optional[Dict] = None) -> bool:
|
||||
if metric:
|
||||
self._metric = metric
|
||||
if mode:
|
||||
assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
|
||||
if mode == "max":
|
||||
self.metric_op = -1.
|
||||
elif mode == "min":
|
||||
self.metric_op = 1.
|
||||
if config:
|
||||
self.space = config
|
||||
self._init_search()
|
||||
return True
|
||||
|
||||
def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None,
|
||||
error: bool = False):
|
||||
''' compare with incumbent
|
||||
'''
|
||||
# if better, move, reset num_complete and num_proposed
|
||||
# if not better and num_complete >= 2*dim, num_allowed += 2
|
||||
self.trial_count += 1
|
||||
if not error and result:
|
||||
obj = result.get(self._metric)
|
||||
if obj:
|
||||
obj *= self.metric_op
|
||||
if obj < self.best_obj:
|
||||
self.best_obj, self.best_config = obj, self._configs[
|
||||
trial_id]
|
||||
self.incumbent = self.normalize(self.best_config)
|
||||
self.cost_incumbent = result.get(self.cost_attr)
|
||||
if self._resource:
|
||||
self._resource = self.best_config[self.prune_attr]
|
||||
self._num_complete4incumbent = 0
|
||||
self._cost_complete4incumbent = 0
|
||||
self._num_allowed4incumbent = 2 * self.dim
|
||||
self._proposed_by.clear()
|
||||
if self._K > 0:
|
||||
self.step *= np.sqrt(self._K/self._oldK)
|
||||
if self.step > self.step_ub: self.step = self.step_ub
|
||||
self._iter_best_config = self.trial_count
|
||||
return
|
||||
proposed_by = self._proposed_by.get(trial_id)
|
||||
if proposed_by == self.incumbent:
|
||||
# proposed by current incumbent and no better
|
||||
self._num_complete4incumbent += 1
|
||||
cost = result.get(self.cost_attr)
|
||||
if cost: self._cost_complete4incumbent += cost
|
||||
if self._num_complete4incumbent >= 2*self.dim and \
|
||||
self._num_allowed4incumbent == 0:
|
||||
self._num_allowed4incumbent = 2
|
||||
if self._num_complete4incumbent == self.dir and (not self._resource
|
||||
or self._resource == self.max_resource):
|
||||
# check stuck condition if using max resource
|
||||
if self.step >= self.step_lower_bound:
|
||||
# decrease step size
|
||||
self._oldK = self._K if self._K else self._iter_best_config
|
||||
self._K = self.trial_count+1
|
||||
self.step *= np.sqrt(self._oldK/self._K)
|
||||
# logger.info(f"step={self.step}, lb={self.step_lower_bound}")
|
||||
self._num_complete4incumbent -= 2
|
||||
if self._num_allowed4incumbent < 2:
|
||||
self._num_allowed4incumbent = 2
|
||||
# elif proposed_by: # proposed by older incumbent
|
||||
# del self._proposed_by[trial_id]
|
||||
|
||||
def on_trial_result(self, trial_id: str, result: Dict):
|
||||
''' early update of incumbent
|
||||
'''
|
||||
if result:
|
||||
obj = result.get(self._metric)
|
||||
if obj:
|
||||
obj *= self.metric_op
|
||||
if obj < self.best_obj:
|
||||
self.best_obj = obj
|
||||
config = self._configs[trial_id]
|
||||
if self.best_config != config:
|
||||
self.best_config = config
|
||||
if self._resource:
|
||||
self._resource = config[self.prune_attr]
|
||||
self.incumbent = self.normalize(self.best_config)
|
||||
self.cost_incumbent = result.get(self.cost_attr)
|
||||
self._cost_complete4incumbent = 0
|
||||
self._num_complete4incumbent = 0
|
||||
self._num_allowed4incumbent = 2 * self.dim
|
||||
self._proposed_by.clear()
|
||||
self._iter_best_config = self.trial_count
|
||||
|
||||
def rand_vector_unit_sphere(self, dim) -> np.ndarray:
|
||||
vec = self._random.normal(0, 1, dim)
|
||||
mag = np.linalg.norm(vec)
|
||||
return vec/mag
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
''' suggest a new config, one of the following cases:
|
||||
1. same incumbent, increase resource
|
||||
2. same resource, move from the incumbent to a random direction
|
||||
3. same resource, move from the incumbent to the opposite direction
|
||||
'''
|
||||
if self._num_complete4incumbent > 0 and self.cost_incumbent and \
|
||||
self._resource and self._resource < self.max_resource and (
|
||||
self._cost_complete4incumbent >=
|
||||
self.cost_incumbent * self.resource_multiple_factor):
|
||||
# consider increasing resource using sum eval cost of complete
|
||||
# configs
|
||||
self._resource = self._round(
|
||||
self._resource * self.resource_multiple_factor)
|
||||
config = self.best_config.copy()
|
||||
config[self.prune_attr] = self._resource
|
||||
# self.incumbent[self.prune_attr] = self._resource
|
||||
self._direction_tried = None
|
||||
self._configs[trial_id] = config
|
||||
return config
|
||||
self._num_allowed4incumbent -= 1
|
||||
move = self.incumbent.copy()
|
||||
if self._direction_tried is not None:
|
||||
# return negative direction
|
||||
for i, key in enumerate(self._tunable_keys):
|
||||
move[key] -= self._direction_tried[i]
|
||||
self._direction_tried = None
|
||||
# propose a new direction
|
||||
self._direction_tried = self.rand_vector_unit_sphere(
|
||||
self.dim) * self.step
|
||||
for i, key in enumerate(self._tunable_keys):
|
||||
move[key] += self._direction_tried[i]
|
||||
self._project(move)
|
||||
config = self.denormalize(move)
|
||||
self._proposed_by[trial_id] = self.incumbent
|
||||
self._configs[trial_id] = config
|
||||
return config
|
||||
|
||||
def _project(self, config):
|
||||
''' project normalized config in the feasible region and set prune_attr
|
||||
'''
|
||||
for key in self._bounded_keys:
|
||||
value = config[key]
|
||||
config[key] = max(0, min(1, value))
|
||||
if self._resource: config[self.prune_attr] = self._resource
|
||||
|
||||
@property
|
||||
def can_suggest(self) -> bool:
|
||||
''' can't suggest if 2*dim configs have been proposed for the incumbent
|
||||
while fewer are completed
|
||||
'''
|
||||
return self._num_allowed4incumbent > 0
|
||||
|
||||
def config_signature(self, config) -> tuple:
|
||||
''' return the signature tuple of a config
|
||||
'''
|
||||
value_list = []
|
||||
for key in self._space_keys:
|
||||
if key in config:
|
||||
value = config[key]
|
||||
if key == self.prune_attr:
|
||||
value_list.append(value)
|
||||
# else key must be in self.space
|
||||
# get rid of list type or constant,
|
||||
# e.g., "eval_metric": ["logloss", "error"]
|
||||
elif callable(getattr(self.space[key], 'sample', None)):
|
||||
if isinstance(self.space[key], sample.Integer):
|
||||
value_list.append(int(round(value)))
|
||||
else:
|
||||
value_list.append(value)
|
||||
else:
|
||||
value_list.append(None)
|
||||
return tuple(value_list)
|
||||
|
||||
@property
|
||||
def converged(self) -> bool:
|
||||
''' return whether the local search has converged
|
||||
'''
|
||||
if self._num_complete4incumbent < self.dir-2: return False
|
||||
# check stepsize after enough configs are completed
|
||||
return self.step < self.step_lower_bound
|
||||
|
||||
def reach(self, other: Searcher) -> bool:
|
||||
''' whether the incumbent can reach the incumbent of other
|
||||
'''
|
||||
config1, config2 = self.best_config, other.best_config
|
||||
incumbent1, incumbent2 = self.incumbent, other.incumbent
|
||||
if self._resource and config1[self.prune_attr]>config2[self.prune_attr]:
|
||||
# resource will not decrease
|
||||
return False
|
||||
for key in self._unordered_cat_hp:
|
||||
# unordered cat choice is hard to reach by chance
|
||||
if config1[key] != config2[key]: return False
|
||||
delta = np.array([incumbent1[key]-incumbent2[key]
|
||||
for key in self._tunable_keys])
|
||||
return np.linalg.norm(delta) <= self.step
|
||||
|
|
@ -0,0 +1,132 @@
|
|||
'''!
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See LICENSE file in the
|
||||
* project root for license information.
|
||||
'''
|
||||
from typing import Dict, Optional
|
||||
import numpy as np
|
||||
try:
|
||||
from ray.tune.suggest import Searcher
|
||||
except ImportError:
|
||||
from .suggestion import Searcher
|
||||
from .flow2 import FLOW2
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SearchThread:
|
||||
'''Class of global or local search thread
|
||||
'''
|
||||
|
||||
cost_attr = 'time_total_s'
|
||||
|
||||
def __init__(self, mode: str = "min",
|
||||
search_alg: Optional[Searcher] = None):
|
||||
''' When search_alg is omitted, use local search FLOW2
|
||||
'''
|
||||
self._search_alg = search_alg
|
||||
self._mode = mode
|
||||
self._metric_op = 1 if mode=='min' else -1
|
||||
self.cost_best = self.cost_last = self.cost_total = self.cost_best1 = \
|
||||
getattr(search_alg, 'cost_incumbent', 0)
|
||||
self.cost_best2 = 0
|
||||
self.obj_best1 = self.obj_best2 = getattr(
|
||||
search_alg, 'best_obj', np.inf) # inherently minimize
|
||||
# eci: expected cost for improvement
|
||||
self.eci = self.cost_best
|
||||
self.priority = self.speed = 0
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
''' use the suggest() of the underlying search algorithm
|
||||
'''
|
||||
if isinstance(self._search_alg, FLOW2):
|
||||
config = self._search_alg.suggest(trial_id)
|
||||
else:
|
||||
try:
|
||||
config = self._search_alg.suggest(trial_id)
|
||||
except:
|
||||
logger.warning(
|
||||
f'The global search method raises error. '
|
||||
'Ignoring for this iteration.')
|
||||
config = None
|
||||
return config
|
||||
|
||||
def update_priority(self, eci: Optional[float] = 0):
|
||||
# optimistic projection
|
||||
self.priority = eci * self.speed - self.obj_best1
|
||||
|
||||
def update_eci(self, metric_target: float,
|
||||
max_speed: Optional[float] = np.inf):
|
||||
# calculate eci: expected cost for improvement over metric_target;
|
||||
best_obj = metric_target * self._metric_op
|
||||
if not self.speed: self.speed = max_speed
|
||||
self.eci = max(self.cost_total - self.cost_best1,
|
||||
self.cost_best1 - self.cost_best2)
|
||||
if self.obj_best1 > best_obj and self.speed > 0:
|
||||
self.eci = max(self.eci, 2*(self.obj_best1-best_obj)/self.speed)
|
||||
|
||||
def _update_speed(self):
|
||||
# calculate speed; use 0 for invalid speed temporarily
|
||||
if self.obj_best2 > self.obj_best1:
|
||||
self.speed = (self.obj_best2 - self.obj_best1) / (
|
||||
self.cost_total - self.cost_best2)
|
||||
else: self.speed = 0
|
||||
|
||||
def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None,
|
||||
error: bool = False):
|
||||
''' update the statistics of the thread
|
||||
'''
|
||||
if not self._search_alg: return
|
||||
if not hasattr(self._search_alg, '_ot_trials') or (not error and
|
||||
trial_id in self._search_alg._ot_trials):
|
||||
# optuna doesn't handle error
|
||||
self._search_alg.on_trial_complete(trial_id, result, error)
|
||||
if result:
|
||||
if self.cost_attr in result:
|
||||
self.cost_last = result[self.cost_attr]
|
||||
self.cost_total += self.cost_last
|
||||
# if not isinstance(self._search_alg, FLOW2):
|
||||
# logger.info(f"result.metric{result[self._search_alg.metric]}")
|
||||
if self._search_alg.metric in result:
|
||||
obj = result[self._search_alg.metric] * self._metric_op
|
||||
if obj < self.obj_best1:
|
||||
self.cost_best2 = self.cost_best1
|
||||
self.cost_best1 = self.cost_total
|
||||
self.obj_best2 = obj if np.isinf(
|
||||
self.obj_best1) else self.obj_best1
|
||||
self.obj_best1 = obj
|
||||
self.cost_best = self.cost_last
|
||||
self._update_speed()
|
||||
|
||||
def on_trial_result(self, trial_id: str, result: Dict):
|
||||
''' TODO update the statistics of the thread with partial result?
|
||||
'''
|
||||
# print('[SearchThread] on trial result')
|
||||
if not self._search_alg: return
|
||||
if not hasattr(self._search_alg, '_ot_trials') or (
|
||||
trial_id in self._search_alg._ot_trials):
|
||||
self._search_alg.on_trial_result(trial_id, result)
|
||||
if self.cost_attr in result and self.cost_last < result[self.cost_attr]:
|
||||
self.cost_last = result[self.cost_attr]
|
||||
# self._update_speed()
|
||||
|
||||
@property
|
||||
def converged(self) -> bool:
|
||||
return self._search_alg.converged
|
||||
|
||||
@property
|
||||
def resource(self) -> float:
|
||||
return self._search_alg.resource
|
||||
|
||||
def reach(self, thread) -> bool:
|
||||
''' whether the incumbent can reach the incumbent of thread
|
||||
'''
|
||||
return self._search_alg.reach(thread._search_alg)
|
||||
|
||||
@property
|
||||
def can_suggest(self) -> bool:
|
||||
''' whether the thread can suggest new configs
|
||||
'''
|
||||
return self._search_alg.can_suggest
|
||||
|
|
@ -0,0 +1,661 @@
|
|||
'''
|
||||
Copyright 2020 The Ray Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
This source file is adapted here because ray does not fully support Windows.
|
||||
'''
|
||||
import copy
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
from typing import Dict, Optional, Union, List, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
UNRESOLVED_SEARCH_SPACE = str(
|
||||
"You passed a `{par}` parameter to {cls} that contained unresolved search "
|
||||
"space definitions. {cls} should however be instantiated with fully "
|
||||
"configured search spaces only. To use Ray Tune's automatic search space "
|
||||
"conversion, pass the space definition as part of the `config` argument "
|
||||
"to `tune.run()` instead.")
|
||||
|
||||
UNDEFINED_SEARCH_SPACE = str(
|
||||
"Trying to sample a configuration from {cls}, but no search "
|
||||
"space has been defined. Either pass the `{space}` argument when "
|
||||
"instantiating the search algorithm, or pass a `config` to "
|
||||
"`tune.run()`.")
|
||||
|
||||
UNDEFINED_METRIC_MODE = str(
|
||||
"Trying to sample a configuration from {cls}, but the `metric` "
|
||||
"({metric}) or `mode` ({mode}) parameters have not been set. "
|
||||
"Either pass these arguments when instantiating the search algorithm, "
|
||||
"or pass them to `tune.run()`.")
|
||||
|
||||
|
||||
class Searcher:
|
||||
"""Abstract class for wrapping suggesting algorithms.
|
||||
Custom algorithms can extend this class easily by overriding the
|
||||
`suggest` method provide generated parameters for the trials.
|
||||
Any subclass that implements ``__init__`` must also call the
|
||||
constructor of this class: ``super(Subclass, self).__init__(...)``.
|
||||
To track suggestions and their corresponding evaluations, the method
|
||||
`suggest` will be passed a trial_id, which will be used in
|
||||
subsequent notifications.
|
||||
Not all implementations support multi objectives.
|
||||
Args:
|
||||
metric (str or list): The training result objective value attribute. If
|
||||
list then list of training result objective value attributes
|
||||
mode (str or list): If string One of {min, max}. If list then
|
||||
list of max and min, determines whether objective is minimizing
|
||||
or maximizing the metric attribute. Must match type of metric.
|
||||
.. code-block:: python
|
||||
class ExampleSearch(Searcher):
|
||||
def __init__(self, metric="mean_loss", mode="min", **kwargs):
|
||||
super(ExampleSearch, self).__init__(
|
||||
metric=metric, mode=mode, **kwargs)
|
||||
self.optimizer = Optimizer()
|
||||
self.configurations = {}
|
||||
def suggest(self, trial_id):
|
||||
configuration = self.optimizer.query()
|
||||
self.configurations[trial_id] = configuration
|
||||
def on_trial_complete(self, trial_id, result, **kwargs):
|
||||
configuration = self.configurations[trial_id]
|
||||
if result and self.metric in result:
|
||||
self.optimizer.update(configuration, result[self.metric])
|
||||
tune.run(trainable_function, search_alg=ExampleSearch())
|
||||
"""
|
||||
FINISHED = "FINISHED"
|
||||
CKPT_FILE_TMPL = "searcher-state-{}.pkl"
|
||||
|
||||
def __init__(self,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
max_concurrent: Optional[int] = None,
|
||||
use_early_stopped_trials: Optional[bool] = None):
|
||||
if use_early_stopped_trials is False:
|
||||
raise DeprecationWarning(
|
||||
"Early stopped trials are now always used. If this is a "
|
||||
"problem, file an issue: https://github.com/ray-project/ray.")
|
||||
if max_concurrent is not None:
|
||||
logger.warning(
|
||||
"DeprecationWarning: `max_concurrent` is deprecated for this "
|
||||
"search algorithm. Use tune.suggest.ConcurrencyLimiter() "
|
||||
"instead. This will raise an error in future versions of Ray.")
|
||||
|
||||
self._metric = metric
|
||||
self._mode = mode
|
||||
|
||||
if not mode or not metric:
|
||||
# Early return to avoid assertions
|
||||
return
|
||||
|
||||
assert isinstance(
|
||||
metric, type(mode)), "metric and mode must be of the same type"
|
||||
if isinstance(mode, str):
|
||||
assert mode in ["min", "max"
|
||||
], "if `mode` is a str must be 'min' or 'max'!"
|
||||
elif isinstance(mode, list):
|
||||
assert len(mode) == len(
|
||||
metric), "Metric and mode must be the same length"
|
||||
assert all(mod in ["min", "max", "obs"] for mod in
|
||||
mode), "All of mode must be 'min' or 'max' or 'obs'!"
|
||||
else:
|
||||
raise ValueError("Mode most either be a list or string")
|
||||
|
||||
def set_search_properties(self, metric: Optional[str], mode: Optional[str],
|
||||
config: Dict) -> bool:
|
||||
"""Pass search properties to searcher.
|
||||
This method acts as an alternative to instantiating search algorithms
|
||||
with their own specific search spaces. Instead they can accept a
|
||||
Tune config through this method. A searcher should return ``True``
|
||||
if setting the config was successful, or ``False`` if it was
|
||||
unsuccessful, e.g. when the search space has already been set.
|
||||
Args:
|
||||
metric (str): Metric to optimize
|
||||
mode (str): One of ["min", "max"]. Direction to optimize.
|
||||
config (dict): Tune config dict.
|
||||
"""
|
||||
return False
|
||||
|
||||
def on_trial_result(self, trial_id: str, result: Dict):
|
||||
"""Optional notification for result during training.
|
||||
Note that by default, the result dict may include NaNs or
|
||||
may not include the optimization metric. It is up to the
|
||||
subclass implementation to preprocess the result to
|
||||
avoid breaking the optimization process.
|
||||
Args:
|
||||
trial_id (str): A unique string ID for the trial.
|
||||
result (dict): Dictionary of metrics for current training progress.
|
||||
Note that the result dict may include NaNs or
|
||||
may not include the optimization metric. It is up to the
|
||||
subclass implementation to preprocess the result to
|
||||
avoid breaking the optimization process.
|
||||
"""
|
||||
pass
|
||||
|
||||
def on_trial_complete(self,
|
||||
trial_id: str,
|
||||
result: Optional[Dict] = None,
|
||||
error: bool = False):
|
||||
"""Notification for the completion of trial.
|
||||
Typically, this method is used for notifying the underlying
|
||||
optimizer of the result.
|
||||
Args:
|
||||
trial_id (str): A unique string ID for the trial.
|
||||
result (dict): Dictionary of metrics for current training progress.
|
||||
Note that the result dict may include NaNs or
|
||||
may not include the optimization metric. It is up to the
|
||||
subclass implementation to preprocess the result to
|
||||
avoid breaking the optimization process. Upon errors, this
|
||||
may also be None.
|
||||
error (bool): True if the training process raised an error.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
"""Queries the algorithm to retrieve the next set of parameters.
|
||||
Arguments:
|
||||
trial_id (str): Trial ID used for subsequent notifications.
|
||||
Returns:
|
||||
dict | FINISHED | None: Configuration for a trial, if possible.
|
||||
If FINISHED is returned, Tune will be notified that
|
||||
no more suggestions/configurations will be provided.
|
||||
If None is returned, Tune will skip the querying of the
|
||||
searcher for this step.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def save(self, checkpoint_path: str):
|
||||
"""Save state to path for this search algorithm.
|
||||
Args:
|
||||
checkpoint_path (str): File where the search algorithm
|
||||
state is saved. This path should be used later when
|
||||
restoring from file.
|
||||
Example:
|
||||
.. code-block:: python
|
||||
search_alg = Searcher(...)
|
||||
analysis = tune.run(
|
||||
cost,
|
||||
num_samples=5,
|
||||
search_alg=search_alg,
|
||||
name=self.experiment_name,
|
||||
local_dir=self.tmpdir)
|
||||
search_alg.save("./my_favorite_path.pkl")
|
||||
.. versionchanged:: 0.8.7
|
||||
Save is automatically called by `tune.run`. You can use
|
||||
`restore_from_dir` to restore from an experiment directory
|
||||
such as `~/ray_results/trainable`.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def restore(self, checkpoint_path: str):
|
||||
"""Restore state for this search algorithm
|
||||
Args:
|
||||
checkpoint_path (str): File where the search algorithm
|
||||
state is saved. This path should be the same
|
||||
as the one provided to "save".
|
||||
Example:
|
||||
.. code-block:: python
|
||||
search_alg.save("./my_favorite_path.pkl")
|
||||
search_alg2 = Searcher(...)
|
||||
search_alg2 = ConcurrencyLimiter(search_alg2, 1)
|
||||
search_alg2.restore(checkpoint_path)
|
||||
tune.run(cost, num_samples=5, search_alg=search_alg2)
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_state(self) -> Dict:
|
||||
raise NotImplementedError
|
||||
|
||||
def set_state(self, state: Dict):
|
||||
raise NotImplementedError
|
||||
|
||||
def save_to_dir(self, checkpoint_dir: str, session_str: str = "default"):
|
||||
"""Automatically saves the given searcher to the checkpoint_dir.
|
||||
This is automatically used by tune.run during a Tune job.
|
||||
Args:
|
||||
checkpoint_dir (str): Filepath to experiment dir.
|
||||
session_str (str): Unique identifier of the current run
|
||||
session.
|
||||
"""
|
||||
tmp_search_ckpt_path = os.path.join(checkpoint_dir,
|
||||
".tmp_searcher_ckpt")
|
||||
success = True
|
||||
try:
|
||||
self.save(tmp_search_ckpt_path)
|
||||
except NotImplementedError:
|
||||
if log_once("suggest:save_to_dir"):
|
||||
logger.warning(
|
||||
"save not implemented for Searcher. Skipping save.")
|
||||
success = False
|
||||
|
||||
if success and os.path.exists(tmp_search_ckpt_path):
|
||||
os.rename(
|
||||
tmp_search_ckpt_path,
|
||||
os.path.join(checkpoint_dir,
|
||||
self.CKPT_FILE_TMPL.format(session_str)))
|
||||
|
||||
def restore_from_dir(self, checkpoint_dir: str):
|
||||
"""Restores the state of a searcher from a given checkpoint_dir.
|
||||
Typically, you should use this function to restore from an
|
||||
experiment directory such as `~/ray_results/trainable`.
|
||||
.. code-block:: python
|
||||
experiment_1 = tune.run(
|
||||
cost,
|
||||
num_samples=5,
|
||||
search_alg=search_alg,
|
||||
verbose=0,
|
||||
name=self.experiment_name,
|
||||
local_dir="~/my_results")
|
||||
search_alg2 = Searcher()
|
||||
search_alg2.restore_from_dir(
|
||||
os.path.join("~/my_results", self.experiment_name)
|
||||
"""
|
||||
|
||||
pattern = self.CKPT_FILE_TMPL.format("*")
|
||||
full_paths = glob.glob(os.path.join(checkpoint_dir, pattern))
|
||||
if not full_paths:
|
||||
raise RuntimeError(
|
||||
"Searcher unable to find checkpoint in {}".format(
|
||||
checkpoint_dir)) # TODO
|
||||
most_recent_checkpoint = max(full_paths)
|
||||
self.restore(most_recent_checkpoint)
|
||||
|
||||
@property
|
||||
def metric(self) -> str:
|
||||
"""The training result objective value attribute."""
|
||||
return self._metric
|
||||
|
||||
@property
|
||||
def mode(self) -> str:
|
||||
"""Specifies if minimizing or maximizing the metric."""
|
||||
return self._mode
|
||||
|
||||
|
||||
class ConcurrencyLimiter(Searcher):
|
||||
"""A wrapper algorithm for limiting the number of concurrent trials.
|
||||
Args:
|
||||
searcher (Searcher): Searcher object that the
|
||||
ConcurrencyLimiter will manage.
|
||||
max_concurrent (int): Maximum concurrent samples from the underlying
|
||||
searcher.
|
||||
batch (bool): Whether to wait for all concurrent samples
|
||||
to finish before updating the underlying searcher.
|
||||
Example:
|
||||
.. code-block:: python
|
||||
from ray.tune.suggest import ConcurrencyLimiter
|
||||
search_alg = HyperOptSearch(metric="accuracy")
|
||||
search_alg = ConcurrencyLimiter(search_alg, max_concurrent=2)
|
||||
tune.run(trainable, search_alg=search_alg)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
searcher: Searcher,
|
||||
max_concurrent: int,
|
||||
batch: bool = False):
|
||||
assert type(max_concurrent) is int and max_concurrent > 0
|
||||
self.searcher = searcher
|
||||
self.max_concurrent = max_concurrent
|
||||
self.batch = batch
|
||||
self.live_trials = set()
|
||||
self.cached_results = {}
|
||||
super(ConcurrencyLimiter, self).__init__(
|
||||
metric=self.searcher.metric, mode=self.searcher.mode)
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
assert trial_id not in self.live_trials, (
|
||||
f"Trial ID {trial_id} must be unique: already found in set.")
|
||||
if len(self.live_trials) >= self.max_concurrent:
|
||||
logger.debug(
|
||||
f"Not providing a suggestion for {trial_id} due to "
|
||||
"concurrency limit: %s/%s.", len(self.live_trials),
|
||||
self.max_concurrent)
|
||||
return
|
||||
|
||||
suggestion = self.searcher.suggest(trial_id)
|
||||
if suggestion not in (None, Searcher.FINISHED):
|
||||
self.live_trials.add(trial_id)
|
||||
return suggestion
|
||||
|
||||
def on_trial_complete(self,
|
||||
trial_id: str,
|
||||
result: Optional[Dict] = None,
|
||||
error: bool = False):
|
||||
if trial_id not in self.live_trials:
|
||||
return
|
||||
elif self.batch:
|
||||
self.cached_results[trial_id] = (result, error)
|
||||
if len(self.cached_results) == self.max_concurrent:
|
||||
# Update the underlying searcher once the
|
||||
# full batch is completed.
|
||||
for trial_id, (result, error) in self.cached_results.items():
|
||||
self.searcher.on_trial_complete(
|
||||
trial_id, result=result, error=error)
|
||||
self.live_trials.remove(trial_id)
|
||||
self.cached_results = {}
|
||||
else:
|
||||
return
|
||||
else:
|
||||
self.searcher.on_trial_complete(
|
||||
trial_id, result=result, error=error)
|
||||
self.live_trials.remove(trial_id)
|
||||
|
||||
def get_state(self) -> Dict:
|
||||
state = self.__dict__.copy()
|
||||
del state["searcher"]
|
||||
return copy.deepcopy(state)
|
||||
|
||||
def set_state(self, state: Dict):
|
||||
self.__dict__.update(state)
|
||||
|
||||
def save(self, checkpoint_path: str):
|
||||
self.searcher.save(checkpoint_path)
|
||||
|
||||
def restore(self, checkpoint_path: str):
|
||||
self.searcher.restore(checkpoint_path)
|
||||
|
||||
def on_pause(self, trial_id: str):
|
||||
self.searcher.on_pause(trial_id)
|
||||
|
||||
def on_unpause(self, trial_id: str):
|
||||
self.searcher.on_unpause(trial_id)
|
||||
|
||||
def set_search_properties(self, metric: Optional[str], mode: Optional[str],
|
||||
config: Dict) -> bool:
|
||||
return self.searcher.set_search_properties(metric, mode, config)
|
||||
|
||||
|
||||
import pickle
|
||||
from .variant_generator import parse_spec_vars
|
||||
from ..tune.sample import Categorical, Domain, Float, Integer, LogUniform, \
|
||||
Quantized, Uniform
|
||||
from ..tune.trial import flatten_dict, unflatten_dict
|
||||
|
||||
try:
|
||||
import optuna as ot
|
||||
from optuna.samplers import BaseSampler
|
||||
except ImportError:
|
||||
ot = None
|
||||
BaseSampler = None
|
||||
|
||||
|
||||
class _Param:
|
||||
def __getattr__(self, item):
|
||||
def _inner(*args, **kwargs):
|
||||
return (item, args, kwargs)
|
||||
|
||||
return _inner
|
||||
|
||||
|
||||
param = _Param()
|
||||
|
||||
|
||||
# (Optional) Default (anonymous) metric when using tune.report(x)
|
||||
DEFAULT_METRIC = "_metric"
|
||||
|
||||
# (Auto-filled) The index of this training iteration.
|
||||
TRAINING_ITERATION = "training_iteration"
|
||||
|
||||
|
||||
class OptunaSearch(Searcher):
|
||||
"""A wrapper around Optuna to provide trial suggestions.
|
||||
`Optuna <https://optuna.org/>`_ is a hyperparameter optimization library.
|
||||
In contrast to other libraries, it employs define-by-run style
|
||||
hyperparameter definitions.
|
||||
This Searcher is a thin wrapper around Optuna's search algorithms.
|
||||
You can pass any Optuna sampler, which will be used to generate
|
||||
hyperparameter suggestions.
|
||||
Please note that this wrapper does not support define-by-run, so the
|
||||
search space will be configured before running the optimization. You will
|
||||
also need to use a Tune trainable (e.g. using the function API) with
|
||||
this wrapper.
|
||||
For defining the search space, use ``ray.tune.suggest.optuna.param``
|
||||
(see example).
|
||||
Args:
|
||||
space (list): Hyperparameter search space definition for Optuna's
|
||||
sampler. This is a list, and samples for the parameters will
|
||||
be obtained in order.
|
||||
metric (str): The training result objective value attribute. If None
|
||||
but a mode was passed, the anonymous metric `_metric` will be used
|
||||
per default.
|
||||
mode (str): One of {min, max}. Determines whether objective is
|
||||
minimizing or maximizing the metric attribute.
|
||||
points_to_evaluate (list): Initial parameter suggestions to be run
|
||||
first. This is for when you already have some good parameters
|
||||
you want to run first to help the algorithm make better suggestions
|
||||
for future parameters. Needs to be a list of dicts containing the
|
||||
configurations.
|
||||
sampler (optuna.samplers.BaseSampler): Optuna sampler used to
|
||||
draw hyperparameter configurations. Defaults to ``TPESampler``.
|
||||
Tune automatically converts search spaces to Optuna's format:
|
||||
.. code-block:: python
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
config = {
|
||||
"a": tune.uniform(6, 8)
|
||||
"b": tune.uniform(10, 20)
|
||||
}
|
||||
optuna_search = OptunaSearch(
|
||||
metric="loss",
|
||||
mode="min")
|
||||
tune.run(trainable, config=config, search_alg=optuna_search)
|
||||
If you would like to pass the search space manually, the code would
|
||||
look like this:
|
||||
.. code-block:: python
|
||||
from ray.tune.suggest.optuna import OptunaSearch, param
|
||||
space = [
|
||||
param.suggest_uniform("a", 6, 8),
|
||||
param.suggest_uniform("b", 10, 20)
|
||||
]
|
||||
algo = OptunaSearch(
|
||||
space,
|
||||
metric="loss",
|
||||
mode="min")
|
||||
tune.run(trainable, search_alg=optuna_search)
|
||||
.. versionadded:: 0.8.8
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
space: Optional[Union[Dict, List[Tuple]]] = None,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
points_to_evaluate: Optional[List[Dict]] = None,
|
||||
sampler: Optional[BaseSampler] = None):
|
||||
assert ot is not None, (
|
||||
"Optuna must be installed! Run `pip install optuna`.")
|
||||
super(OptunaSearch, self).__init__(
|
||||
metric=metric,
|
||||
mode=mode,
|
||||
max_concurrent=None,
|
||||
use_early_stopped_trials=None)
|
||||
|
||||
if isinstance(space, dict) and space:
|
||||
resolved_vars, domain_vars, grid_vars = parse_spec_vars(space)
|
||||
if domain_vars or grid_vars:
|
||||
logger.warning(
|
||||
UNRESOLVED_SEARCH_SPACE.format(
|
||||
par="space", cls=type(self)))
|
||||
space = self.convert_search_space(space)
|
||||
|
||||
self._space = space
|
||||
|
||||
self._points_to_evaluate = points_to_evaluate
|
||||
|
||||
self._study_name = "optuna" # Fixed study name for in-memory storage
|
||||
self._sampler = sampler or ot.samplers.TPESampler()
|
||||
assert isinstance(self._sampler, BaseSampler), \
|
||||
"You can only pass an instance of `optuna.samplers.BaseSampler` " \
|
||||
"as a sampler to `OptunaSearcher`."
|
||||
|
||||
self._pruner = ot.pruners.NopPruner()
|
||||
self._storage = ot.storages.InMemoryStorage()
|
||||
|
||||
self._ot_trials = {}
|
||||
self._ot_study = None
|
||||
if self._space:
|
||||
self._setup_study(mode)
|
||||
|
||||
def _setup_study(self, mode: str):
|
||||
if self._metric is None and self._mode:
|
||||
# If only a mode was passed, use anonymous metric
|
||||
self._metric = DEFAULT_METRIC
|
||||
|
||||
self._ot_study = ot.study.create_study(
|
||||
storage=self._storage,
|
||||
sampler=self._sampler,
|
||||
pruner=self._pruner,
|
||||
study_name=self._study_name,
|
||||
direction="minimize" if mode == "min" else "maximize",
|
||||
load_if_exists=True)
|
||||
|
||||
def set_search_properties(self, metric: Optional[str], mode: Optional[str],
|
||||
config: Dict) -> bool:
|
||||
if self._space:
|
||||
return False
|
||||
space = self.convert_search_space(config)
|
||||
self._space = space
|
||||
if metric:
|
||||
self._metric = metric
|
||||
if mode:
|
||||
self._mode = mode
|
||||
|
||||
self._setup_study(mode)
|
||||
return True
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
if not self._space:
|
||||
raise RuntimeError(
|
||||
UNDEFINED_SEARCH_SPACE.format(
|
||||
cls=self.__class__.__name__, space="space"))
|
||||
if not self._metric or not self._mode:
|
||||
raise RuntimeError(
|
||||
UNDEFINED_METRIC_MODE.format(
|
||||
cls=self.__class__.__name__,
|
||||
metric=self._metric,
|
||||
mode=self._mode))
|
||||
|
||||
if trial_id not in self._ot_trials:
|
||||
ot_trial_id = self._storage.create_new_trial(
|
||||
self._ot_study._study_id)
|
||||
self._ot_trials[trial_id] = ot.trial.Trial(self._ot_study,
|
||||
ot_trial_id)
|
||||
ot_trial = self._ot_trials[trial_id]
|
||||
|
||||
if self._points_to_evaluate:
|
||||
params = self._points_to_evaluate.pop(0)
|
||||
else:
|
||||
# getattr will fetch the trial.suggest_ function on Optuna trials
|
||||
params = {
|
||||
args[0] if len(args) > 0 else kwargs["name"]: getattr(
|
||||
ot_trial, fn)(*args, **kwargs)
|
||||
for (fn, args, kwargs) in self._space
|
||||
}
|
||||
return unflatten_dict(params)
|
||||
|
||||
def on_trial_result(self, trial_id: str, result: Dict):
|
||||
metric = result[self.metric]
|
||||
step = result[TRAINING_ITERATION]
|
||||
ot_trial = self._ot_trials[trial_id]
|
||||
ot_trial.report(metric, step)
|
||||
|
||||
def on_trial_complete(self,
|
||||
trial_id: str,
|
||||
result: Optional[Dict] = None,
|
||||
error: bool = False):
|
||||
ot_trial = self._ot_trials[trial_id]
|
||||
ot_trial_id = ot_trial._trial_id
|
||||
self._storage.set_trial_value(ot_trial_id, result.get(
|
||||
self.metric, None))
|
||||
self._storage.set_trial_state(ot_trial_id,
|
||||
ot.trial.TrialState.COMPLETE)
|
||||
|
||||
def save(self, checkpoint_path: str):
|
||||
save_object = (self._storage, self._pruner, self._sampler,
|
||||
self._ot_trials, self._ot_study,
|
||||
self._points_to_evaluate)
|
||||
with open(checkpoint_path, "wb") as outputFile:
|
||||
pickle.dump(save_object, outputFile)
|
||||
|
||||
def restore(self, checkpoint_path: str):
|
||||
with open(checkpoint_path, "rb") as inputFile:
|
||||
save_object = pickle.load(inputFile)
|
||||
self._storage, self._pruner, self._sampler, \
|
||||
self._ot_trials, self._ot_study, \
|
||||
self._points_to_evaluate = save_object
|
||||
|
||||
@staticmethod
|
||||
def convert_search_space(spec: Dict) -> List[Tuple]:
|
||||
resolved_vars, domain_vars, grid_vars = parse_spec_vars(spec)
|
||||
|
||||
if not domain_vars and not grid_vars:
|
||||
return []
|
||||
|
||||
if grid_vars:
|
||||
raise ValueError(
|
||||
"Grid search parameters cannot be automatically converted "
|
||||
"to an Optuna search space.")
|
||||
|
||||
# Flatten and resolve again after checking for grid search.
|
||||
spec = flatten_dict(spec, prevent_delimiter=True)
|
||||
resolved_vars, domain_vars, grid_vars = parse_spec_vars(spec)
|
||||
|
||||
def resolve_value(par: str, domain: Domain) -> Tuple:
|
||||
quantize = None
|
||||
|
||||
sampler = domain.get_sampler()
|
||||
if isinstance(sampler, Quantized):
|
||||
quantize = sampler.q
|
||||
sampler = sampler.sampler
|
||||
|
||||
if isinstance(domain, Float):
|
||||
if isinstance(sampler, LogUniform):
|
||||
if quantize:
|
||||
logger.warning(
|
||||
"Optuna does not support both quantization and "
|
||||
"sampling from LogUniform. Dropped quantization.")
|
||||
return param.suggest_loguniform(par, domain.lower,
|
||||
domain.upper)
|
||||
elif isinstance(sampler, Uniform):
|
||||
if quantize:
|
||||
return param.suggest_discrete_uniform(
|
||||
par, domain.lower, domain.upper, quantize)
|
||||
return param.suggest_uniform(par, domain.lower,
|
||||
domain.upper)
|
||||
elif isinstance(domain, Integer):
|
||||
if isinstance(sampler, LogUniform):
|
||||
if quantize:
|
||||
logger.warning(
|
||||
"Optuna does not support both quantization and "
|
||||
"sampling from LogUniform. Dropped quantization.")
|
||||
return param.suggest_int(
|
||||
par, domain.lower, domain.upper, log=True)
|
||||
elif isinstance(sampler, Uniform):
|
||||
return param.suggest_int(
|
||||
par, domain.lower, domain.upper, step=quantize or 1)
|
||||
elif isinstance(domain, Categorical):
|
||||
if isinstance(sampler, Uniform):
|
||||
return param.suggest_categorical(par, domain.categories)
|
||||
|
||||
raise ValueError(
|
||||
"Optuna search does not support parameters of type "
|
||||
"`{}` with samplers of type `{}`".format(
|
||||
type(domain).__name__,
|
||||
type(domain.sampler).__name__))
|
||||
|
||||
# Parameter name is e.g. "a/b/c" for nested dicts
|
||||
values = [
|
||||
resolve_value("/".join(path), domain)
|
||||
for path, domain in domain_vars
|
||||
]
|
||||
|
||||
return values
|
|
@ -0,0 +1,396 @@
|
|||
'''
|
||||
Copyright 2020 The Ray Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
This source file is adapted here because ray does not fully support Windows.
|
||||
'''
|
||||
import copy
|
||||
import logging
|
||||
from collections.abc import Mapping
|
||||
from typing import Any, Dict, Generator, List, Optional, Tuple
|
||||
|
||||
import numpy
|
||||
import random
|
||||
|
||||
from ..tune.sample import Categorical, Domain, Function
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TuneError(Exception):
|
||||
"""General error class raised by ray.tune."""
|
||||
pass
|
||||
|
||||
|
||||
def generate_variants(
|
||||
unresolved_spec: Dict) -> Generator[Tuple[Dict, Dict], None, None]:
|
||||
"""Generates variants from a spec (dict) with unresolved values.
|
||||
There are two types of unresolved values:
|
||||
Grid search: These define a grid search over values. For example, the
|
||||
following grid search values in a spec will produce six distinct
|
||||
variants in combination:
|
||||
"activation": grid_search(["relu", "tanh"])
|
||||
"learning_rate": grid_search([1e-3, 1e-4, 1e-5])
|
||||
Lambda functions: These are evaluated to produce a concrete value, and
|
||||
can express dependencies or conditional distributions between values.
|
||||
They can also be used to express random search (e.g., by calling
|
||||
into the `random` or `np` module).
|
||||
"cpu": lambda spec: spec.config.num_workers
|
||||
"batch_size": lambda spec: random.uniform(1, 1000)
|
||||
Finally, to support defining specs in plain JSON / YAML, grid search
|
||||
and lambda functions can also be defined alternatively as follows:
|
||||
"activation": {"grid_search": ["relu", "tanh"]}
|
||||
"cpu": {"eval": "spec.config.num_workers"}
|
||||
Use `format_vars` to format the returned dict of hyperparameters.
|
||||
Yields:
|
||||
(Dict of resolved variables, Spec object)
|
||||
"""
|
||||
for resolved_vars, spec in _generate_variants(unresolved_spec):
|
||||
assert not _unresolved_values(spec)
|
||||
yield resolved_vars, spec
|
||||
|
||||
|
||||
def grid_search(values: List) -> Dict[str, List]:
|
||||
"""Convenience method for specifying grid search over a value.
|
||||
Arguments:
|
||||
values: An iterable whose parameters will be gridded.
|
||||
"""
|
||||
|
||||
return {"grid_search": values}
|
||||
|
||||
|
||||
_STANDARD_IMPORTS = {
|
||||
"random": random,
|
||||
"np": numpy,
|
||||
}
|
||||
|
||||
_MAX_RESOLUTION_PASSES = 20
|
||||
|
||||
|
||||
def resolve_nested_dict(nested_dict: Dict) -> Dict[Tuple, Any]:
|
||||
"""Flattens a nested dict by joining keys into tuple of paths.
|
||||
Can then be passed into `format_vars`.
|
||||
"""
|
||||
res = {}
|
||||
for k, v in nested_dict.items():
|
||||
if isinstance(v, dict):
|
||||
for k_, v_ in resolve_nested_dict(v).items():
|
||||
res[(k, ) + k_] = v_
|
||||
else:
|
||||
res[(k, )] = v
|
||||
return res
|
||||
|
||||
|
||||
def format_vars(resolved_vars: Dict) -> str:
|
||||
"""Formats the resolved variable dict into a single string."""
|
||||
out = []
|
||||
for path, value in sorted(resolved_vars.items()):
|
||||
if path[0] in ["run", "env", "resources_per_trial"]:
|
||||
continue # TrialRunner already has these in the experiment_tag
|
||||
pieces = []
|
||||
last_string = True
|
||||
for k in path[::-1]:
|
||||
if isinstance(k, int):
|
||||
pieces.append(str(k))
|
||||
elif last_string:
|
||||
last_string = False
|
||||
pieces.append(k)
|
||||
pieces.reverse()
|
||||
out.append(_clean_value("_".join(pieces)) + "=" + _clean_value(value))
|
||||
return ",".join(out)
|
||||
|
||||
|
||||
def flatten_resolved_vars(resolved_vars: Dict) -> Dict:
|
||||
"""Formats the resolved variable dict into a mapping of (str -> value)."""
|
||||
flattened_resolved_vars_dict = {}
|
||||
for pieces, value in resolved_vars.items():
|
||||
if pieces[0] == "config":
|
||||
pieces = pieces[1:]
|
||||
pieces = [str(piece) for piece in pieces]
|
||||
flattened_resolved_vars_dict["/".join(pieces)] = value
|
||||
return flattened_resolved_vars_dict
|
||||
|
||||
|
||||
def _clean_value(value: Any) -> str:
|
||||
if isinstance(value, float):
|
||||
return "{:.5}".format(value)
|
||||
else:
|
||||
return str(value).replace("/", "_")
|
||||
|
||||
|
||||
def parse_spec_vars(spec: Dict) -> Tuple[List[Tuple[Tuple, Any]], List[Tuple[
|
||||
Tuple, Any]], List[Tuple[Tuple, Any]]]:
|
||||
resolved, unresolved = _split_resolved_unresolved_values(spec)
|
||||
resolved_vars = list(resolved.items())
|
||||
|
||||
if not unresolved:
|
||||
return resolved_vars, [], []
|
||||
|
||||
grid_vars = []
|
||||
domain_vars = []
|
||||
for path, value in unresolved.items():
|
||||
if value.is_grid():
|
||||
grid_vars.append((path, value))
|
||||
else:
|
||||
domain_vars.append((path, value))
|
||||
grid_vars.sort()
|
||||
|
||||
return resolved_vars, domain_vars, grid_vars
|
||||
|
||||
|
||||
def count_variants(spec: Dict, presets: Optional[List[Dict]] = None) -> int:
|
||||
# Helper function: Deep update dictionary
|
||||
def deep_update(d, u):
|
||||
for k, v in u.items():
|
||||
if isinstance(v, Mapping):
|
||||
d[k] = deep_update(d.get(k, {}), v)
|
||||
else:
|
||||
d[k] = v
|
||||
return d
|
||||
|
||||
# Count samples for a specific spec
|
||||
def spec_samples(spec, num_samples=1):
|
||||
_, domain_vars, grid_vars = parse_spec_vars(spec)
|
||||
grid_count = 1
|
||||
for path, domain in grid_vars:
|
||||
grid_count *= len(domain.categories)
|
||||
return num_samples * grid_count
|
||||
|
||||
total_samples = 0
|
||||
total_num_samples = spec.get("num_samples", 1)
|
||||
# For each preset, overwrite the spec and count the samples generated
|
||||
# for this preset
|
||||
for preset in presets:
|
||||
preset_spec = copy.deepcopy(spec)
|
||||
deep_update(preset_spec["config"], preset)
|
||||
total_samples += spec_samples(preset_spec, 1)
|
||||
total_num_samples -= 1
|
||||
|
||||
# Add the remaining samples
|
||||
if total_num_samples > 0:
|
||||
total_samples += spec_samples(spec, total_num_samples)
|
||||
return total_samples
|
||||
|
||||
|
||||
def _generate_variants(spec: Dict) -> Tuple[Dict, Dict]:
|
||||
spec = copy.deepcopy(spec)
|
||||
_, domain_vars, grid_vars = parse_spec_vars(spec)
|
||||
|
||||
if not domain_vars and not grid_vars:
|
||||
yield {}, spec
|
||||
return
|
||||
|
||||
grid_search = _grid_search_generator(spec, grid_vars)
|
||||
for resolved_spec in grid_search:
|
||||
resolved_vars = _resolve_domain_vars(resolved_spec, domain_vars)
|
||||
for resolved, spec in _generate_variants(resolved_spec):
|
||||
for path, value in grid_vars:
|
||||
resolved_vars[path] = _get_value(spec, path)
|
||||
for k, v in resolved.items():
|
||||
if (k in resolved_vars and v != resolved_vars[k]
|
||||
and _is_resolved(resolved_vars[k])):
|
||||
raise ValueError(
|
||||
"The variable `{}` could not be unambiguously "
|
||||
"resolved to a single value. Consider simplifying "
|
||||
"your configuration.".format(k))
|
||||
resolved_vars[k] = v
|
||||
yield resolved_vars, spec
|
||||
|
||||
|
||||
def get_preset_variants(spec: Dict, config: Dict):
|
||||
"""Get variants according to a spec, initialized with a config.
|
||||
Variables from the spec are overwritten by the variables in the config.
|
||||
Thus, we may end up with less sampled parameters.
|
||||
This function also checks if values used to overwrite search space
|
||||
parameters are valid, and logs a warning if not.
|
||||
"""
|
||||
spec = copy.deepcopy(spec)
|
||||
|
||||
resolved, _, _ = parse_spec_vars(config)
|
||||
|
||||
for path, val in resolved:
|
||||
try:
|
||||
domain = _get_value(spec["config"], path)
|
||||
if isinstance(domain, dict):
|
||||
if "grid_search" in domain:
|
||||
domain = Categorical(domain["grid_search"])
|
||||
else:
|
||||
# If users want to overwrite an entire subdict,
|
||||
# let them do it.
|
||||
domain = None
|
||||
except IndexError as exc:
|
||||
raise ValueError(
|
||||
f"Pre-set config key `{'/'.join(path)}` does not correspond "
|
||||
f"to a valid key in the search space definition. Please add "
|
||||
f"this path to the `config` variable passed to `tune.run()`."
|
||||
) from exc
|
||||
|
||||
if domain and not domain.is_valid(val):
|
||||
logger.warning(
|
||||
f"Pre-set value `{val}` is not within valid values of "
|
||||
f"parameter `{'/'.join(path)}`: {domain.domain_str}")
|
||||
assign_value(spec["config"], path, val)
|
||||
|
||||
return _generate_variants(spec)
|
||||
|
||||
|
||||
def assign_value(spec: Dict, path: Tuple, value: Any):
|
||||
for k in path[:-1]:
|
||||
spec = spec[k]
|
||||
spec[path[-1]] = value
|
||||
|
||||
|
||||
def _get_value(spec: Dict, path: Tuple) -> Any:
|
||||
for k in path:
|
||||
spec = spec[k]
|
||||
return spec
|
||||
|
||||
|
||||
def _resolve_domain_vars(spec: Dict,
|
||||
domain_vars: List[Tuple[Tuple, Domain]]) -> Dict:
|
||||
resolved = {}
|
||||
error = True
|
||||
num_passes = 0
|
||||
while error and num_passes < _MAX_RESOLUTION_PASSES:
|
||||
num_passes += 1
|
||||
error = False
|
||||
for path, domain in domain_vars:
|
||||
if path in resolved:
|
||||
continue
|
||||
try:
|
||||
value = domain.sample(_UnresolvedAccessGuard(spec))
|
||||
except RecursiveDependencyError as e:
|
||||
error = e
|
||||
except Exception:
|
||||
raise ValueError(
|
||||
"Failed to evaluate expression: {}: {}".format(
|
||||
path, domain))
|
||||
else:
|
||||
assign_value(spec, path, value)
|
||||
resolved[path] = value
|
||||
if error:
|
||||
raise error
|
||||
return resolved
|
||||
|
||||
|
||||
def _grid_search_generator(unresolved_spec: Dict,
|
||||
grid_vars: List) -> Generator[Dict, None, None]:
|
||||
value_indices = [0] * len(grid_vars)
|
||||
|
||||
def increment(i):
|
||||
value_indices[i] += 1
|
||||
if value_indices[i] >= len(grid_vars[i][1]):
|
||||
value_indices[i] = 0
|
||||
if i + 1 < len(value_indices):
|
||||
return increment(i + 1)
|
||||
else:
|
||||
return True
|
||||
return False
|
||||
|
||||
if not grid_vars:
|
||||
yield unresolved_spec
|
||||
return
|
||||
|
||||
while value_indices[-1] < len(grid_vars[-1][1]):
|
||||
spec = copy.deepcopy(unresolved_spec)
|
||||
for i, (path, values) in enumerate(grid_vars):
|
||||
assign_value(spec, path, values[value_indices[i]])
|
||||
yield spec
|
||||
if grid_vars:
|
||||
done = increment(0)
|
||||
if done:
|
||||
break
|
||||
|
||||
|
||||
def _is_resolved(v) -> bool:
|
||||
resolved, _ = _try_resolve(v)
|
||||
return resolved
|
||||
|
||||
|
||||
def _try_resolve(v) -> Tuple[bool, Any]:
|
||||
if isinstance(v, Domain):
|
||||
# Domain to sample from
|
||||
return False, v
|
||||
elif isinstance(v, dict) and len(v) == 1 and "eval" in v:
|
||||
# Lambda function in eval syntax
|
||||
return False, Function(
|
||||
lambda spec: eval(v["eval"], _STANDARD_IMPORTS, {"spec": spec}))
|
||||
elif isinstance(v, dict) and len(v) == 1 and "grid_search" in v:
|
||||
# Grid search values
|
||||
grid_values = v["grid_search"]
|
||||
if not isinstance(grid_values, list):
|
||||
raise TuneError(
|
||||
"Grid search expected list of values, got: {}".format(
|
||||
grid_values))
|
||||
return False, Categorical(grid_values).grid()
|
||||
return True, v
|
||||
|
||||
|
||||
def _split_resolved_unresolved_values(
|
||||
spec: Dict) -> Tuple[Dict[Tuple, Any], Dict[Tuple, Any]]:
|
||||
resolved_vars = {}
|
||||
unresolved_vars = {}
|
||||
for k, v in spec.items():
|
||||
resolved, v = _try_resolve(v)
|
||||
if not resolved:
|
||||
unresolved_vars[(k, )] = v
|
||||
elif isinstance(v, dict):
|
||||
# Recurse into a dict
|
||||
_resolved_children, _unresolved_children = \
|
||||
_split_resolved_unresolved_values(v)
|
||||
for (path, value) in _resolved_children.items():
|
||||
resolved_vars[(k, ) + path] = value
|
||||
for (path, value) in _unresolved_children.items():
|
||||
unresolved_vars[(k, ) + path] = value
|
||||
elif isinstance(v, list):
|
||||
# Recurse into a list
|
||||
for i, elem in enumerate(v):
|
||||
_resolved_children, _unresolved_children = \
|
||||
_split_resolved_unresolved_values({i: elem})
|
||||
for (path, value) in _resolved_children.items():
|
||||
resolved_vars[(k, ) + path] = value
|
||||
for (path, value) in _unresolved_children.items():
|
||||
unresolved_vars[(k, ) + path] = value
|
||||
else:
|
||||
resolved_vars[(k, )] = v
|
||||
return resolved_vars, unresolved_vars
|
||||
|
||||
|
||||
def _unresolved_values(spec: Dict) -> Dict[Tuple, Any]:
|
||||
return _split_resolved_unresolved_values(spec)[1]
|
||||
|
||||
|
||||
def has_unresolved_values(spec: Dict) -> bool:
|
||||
return True if _unresolved_values(spec) else False
|
||||
|
||||
|
||||
class _UnresolvedAccessGuard(dict):
|
||||
def __init__(self, *args, **kwds):
|
||||
super(_UnresolvedAccessGuard, self).__init__(*args, **kwds)
|
||||
self.__dict__ = self
|
||||
|
||||
def __getattribute__(self, item):
|
||||
value = dict.__getattribute__(self, item)
|
||||
if not _is_resolved(value):
|
||||
raise RecursiveDependencyError(
|
||||
"`{}` recursively depends on {}".format(item, value))
|
||||
elif isinstance(value, dict):
|
||||
return _UnresolvedAccessGuard(value)
|
||||
else:
|
||||
return value
|
||||
|
||||
|
||||
class RecursiveDependencyError(Exception):
|
||||
def __init__(self, msg: str):
|
||||
Exception.__init__(self, msg)
|
249
flaml/space.py
249
flaml/space.py
|
@ -1,249 +0,0 @@
|
|||
'''!
|
||||
* Copyright (c) 2020 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License.
|
||||
'''
|
||||
|
||||
|
||||
class ConfigSearchInfo:
|
||||
'''The class of the search space of a hyperparameters:
|
||||
|
||||
Attributes:
|
||||
name: A string of the name of the hyperparameter
|
||||
type: data type of the hyperparameter
|
||||
lower: A number of the lower bound of the value
|
||||
upper: A number of the upper bound of the value
|
||||
init: A number of the initial value. For hyperparameters related to
|
||||
complexity, the init value needs to correspond to the lowest
|
||||
complexity
|
||||
change_tpe: A string of the change type, 'linear' or 'log'
|
||||
min_change: A number of the minimal change required. Could be inf if
|
||||
no such requirement
|
||||
'''
|
||||
|
||||
def __init__(self, name, type, lower, upper, init, change_type = 'log',
|
||||
complexity_related = True, min_change = None):
|
||||
self.name = name
|
||||
self.type = type
|
||||
self.lower = lower
|
||||
self.upper = upper
|
||||
self.init = init
|
||||
self.change_type = change_type
|
||||
self.complexity_related = complexity_related
|
||||
# default setting of min_change: if type is int, min_change
|
||||
# should be 1, otherwise +inf
|
||||
if min_change is None:
|
||||
if self.type == int:
|
||||
self.min_change = 1.0 #minimum change required,
|
||||
else:
|
||||
self.min_change = float('+inf')
|
||||
else:
|
||||
self.min_change = min_change
|
||||
|
||||
|
||||
def config_space(estimator, data_size, objective_name = "regression"):
|
||||
CS = {}
|
||||
n_estimators_upper = min(32768,int(data_size))
|
||||
max_leaves_upper = min(32768,int(data_size))
|
||||
# exp_max_depth_upper = min(32768,data_size)
|
||||
if 'xgboost' in estimator:
|
||||
CS['n_estimators'] = ConfigSearchInfo(name = 'n_estimators',
|
||||
type = int, lower = 4, init = 4, upper = n_estimators_upper,
|
||||
change_type = 'log')
|
||||
CS['max_leaves'] = ConfigSearchInfo(name = 'max_leaves', type =int,
|
||||
lower = 4, init = 4, upper = max_leaves_upper, change_type = 'log')
|
||||
CS['min_child_weight'] = ConfigSearchInfo(name = 'min_child_weight',
|
||||
type = float, lower = 0.001, init = 20.0, upper = 20.0,
|
||||
change_type = 'log')
|
||||
|
||||
CS['learning_rate'] = ConfigSearchInfo(name = 'learning_rate',
|
||||
type = float, lower = 0.01, init = 0.1, upper = 1.0,
|
||||
change_type = 'log')
|
||||
CS['subsample'] = ConfigSearchInfo(name = 'subsample', type = float,
|
||||
lower = 0.6, init = 1.0, upper = 1.0, change_type = 'linear')
|
||||
CS['reg_alpha'] = ConfigSearchInfo(name = 'reg_alpha', type = float,
|
||||
lower = 1e-10, init = 1e-10, upper = 1.0, change_type = 'log',
|
||||
complexity_related = True)
|
||||
CS['reg_lambda'] = ConfigSearchInfo(name = 'reg_lambda', type = float,
|
||||
lower = 1e-10, init = 1.0, upper = 1.0, change_type = 'log')
|
||||
CS['colsample_bylevel'] = ConfigSearchInfo(name = 'colsample_bylevel',
|
||||
type = float, lower = 0.6, init = 1.0, upper = 1.0,
|
||||
change_type = 'linear')
|
||||
CS['colsample_bytree'] = ConfigSearchInfo(name = 'colsample_bytree',
|
||||
type = float, lower = 0.7, init = 1.0, upper = 1.0,
|
||||
change_type = 'linear')
|
||||
elif estimator in ('rf', 'extra_tree'):
|
||||
n_estimators_upper = min(2048, n_estimators_upper)
|
||||
# max_leaves_upper = min(2048, max_leaves_upper)
|
||||
CS['n_estimators'] = ConfigSearchInfo(name = 'n_estimators',
|
||||
type = int, lower = 4, init = 4, upper = n_estimators_upper,
|
||||
change_type = 'log')
|
||||
if objective_name != 'regression':
|
||||
CS['criterion'] = ConfigSearchInfo(name = 'criterion',
|
||||
type = int, lower = 1, init = 1, upper = 2,
|
||||
change_type = 'log')
|
||||
|
||||
# CS['max_leaves'] = ConfigSearchInfo(name = 'max_leaves', type =int,
|
||||
# lower = 4, init = 4, upper = max_leaves_upper, change_type = 'log',
|
||||
# complexity_related = True)
|
||||
|
||||
CS['max_features'] = ConfigSearchInfo(name = 'max_features', type = float,
|
||||
lower = 0.1, init = 1.0, upper = 1.0, change_type = 'log')
|
||||
# CS['min_samples_split'] = ConfigSearchInfo(name = 'min_samples_split',
|
||||
# type = int, lower = 2, init = 2, upper = 20, change_type = 'log',
|
||||
# complexity_related = True)
|
||||
# CS['min_samples_leaf'] = ConfigSearchInfo(name = 'min_samples_leaf',
|
||||
# type = int, lower = 1, init = 1, upper = 20, change_type = 'log',
|
||||
# complexity_related = True)
|
||||
elif 'lgbm' in estimator:
|
||||
CS['n_estimators'] = ConfigSearchInfo(name = 'n_estimators', type = int,
|
||||
lower = 4, init = 4, upper = n_estimators_upper, change_type = 'log')
|
||||
CS['max_leaves'] = ConfigSearchInfo(name = 'max_leaves', type = int,
|
||||
lower = 4, init = 4, upper = max_leaves_upper, change_type = 'log')
|
||||
CS['min_child_weight'] = ConfigSearchInfo(name = 'min_child_weight',
|
||||
type = float, lower = 0.001, init = 20, upper = 20.0,
|
||||
change_type = 'log')
|
||||
|
||||
CS['learning_rate'] = ConfigSearchInfo(name = 'learning_rate',
|
||||
type = float, lower = 0.01, init = 0.1, upper = 1.0,
|
||||
change_type = 'log')
|
||||
CS['subsample'] = ConfigSearchInfo(name = 'subsample', type = float,
|
||||
lower = 0.6, init = 1.0, upper = 1.0, change_type = 'log',
|
||||
complexity_related = True)
|
||||
CS['log_max_bin'] = ConfigSearchInfo(name = 'log_max_bin', type = int,
|
||||
lower = 3, init = 8, upper = 10, change_type = 'log',
|
||||
complexity_related = True)
|
||||
CS['reg_alpha'] = ConfigSearchInfo(name = 'reg_alpha', type = float,
|
||||
lower = 1e-10, init = 1e-10, upper = 1.0, change_type = 'log',
|
||||
complexity_related = True)
|
||||
CS['reg_lambda'] = ConfigSearchInfo(name = 'reg_lambda', type = float,
|
||||
lower = 1e-10, init = 1.0, upper = 1.0, change_type = 'log')
|
||||
CS['colsample_bytree'] = ConfigSearchInfo(name = 'colsample_bytree',
|
||||
type = float, lower = 0.7, init = 1.0, upper = 1.0,
|
||||
change_type = 'log')
|
||||
elif 'lr' in estimator:
|
||||
CS['C'] = ConfigSearchInfo(name = 'C', type =float, lower = 0.03125,
|
||||
init = 1.0, upper = 32768.0, change_type = 'log',
|
||||
complexity_related = True)
|
||||
elif 'catboost' in estimator:
|
||||
# CS['n_estimators'] = ConfigSearchInfo(name = 'n_estimators', type = int,
|
||||
# lower = 4, init = 64, upper = n_estimators_upper, change_type = 'log',
|
||||
# complexity_related = True)
|
||||
early_stopping_rounds = max(min(round(1500000/data_size),150), 10)
|
||||
CS['rounds'] = ConfigSearchInfo(name = 'rounds', type = int,
|
||||
lower = 10, init = 10,
|
||||
upper = early_stopping_rounds, change_type = 'log')
|
||||
# CS['exp_max_depth'] = ConfigSearchInfo(name = 'exp_max_depth', type = int,
|
||||
# lower = 32, init = 64, upper = 256, change_type = 'log',
|
||||
# complexity_related = True)
|
||||
|
||||
CS['learning_rate'] = ConfigSearchInfo(name = 'learning_rate',
|
||||
type = float, lower = 0.005, init = 0.1, upper = .2,
|
||||
change_type = 'log')
|
||||
# CS['l2_leaf_reg'] = ConfigSearchInfo(name = 'l2_leaf_reg',
|
||||
# type = float, lower = 1, init = 3, upper = 5,
|
||||
# change_type = 'log')
|
||||
elif 'nn' == estimator:
|
||||
CS['learning_rate'] = ConfigSearchInfo(name = 'learning_rate',
|
||||
type = float, lower = 1e-4, init = 3e-4, upper = 3e-2,
|
||||
change_type = 'log')
|
||||
CS['weight_decay'] = ConfigSearchInfo(name = 'weight_decay',
|
||||
type = float, lower = 1e-12, init = 1e-6, upper = .1,
|
||||
change_type = 'log')
|
||||
CS['dropout_prob'] = ConfigSearchInfo(name = 'dropout_prob',
|
||||
type = float, lower = 1.0, init = 1.1, upper = 1.5,
|
||||
change_type = 'log')
|
||||
elif 'kneighbor' in estimator:
|
||||
n_neighbors_upper = min(512,int(data_size/2))
|
||||
CS['n_neighbors'] = ConfigSearchInfo(name = 'n_neighbors', type = int,
|
||||
lower = 1, init = 5, upper = n_neighbors_upper, change_type = 'log')
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return CS
|
||||
|
||||
|
||||
def estimator_size(config, estimator):
|
||||
if estimator in ['xgboost', 'lgbm', 'rf', 'extra_tree']:
|
||||
try:
|
||||
max_leaves = int(round(config['max_leaves']))
|
||||
n_estimators = int(round(config['n_estimators']))
|
||||
model_size = float((max_leaves*3 + (max_leaves-1)*4 + 1)*
|
||||
n_estimators*8)
|
||||
except:
|
||||
model_size = 0
|
||||
return model_size
|
||||
elif 'catboost' in estimator:
|
||||
# if config is None: raise Exception("config is none")
|
||||
n_estimators = int(round(config.get('n_estimators',8192)))
|
||||
max_leaves = int(round(config.get('exp_max_depth',64)))
|
||||
model_size = float((max_leaves*3 + (max_leaves-1)*4 + 1)*
|
||||
n_estimators*8)
|
||||
return model_size
|
||||
else:
|
||||
model_size = 1.0
|
||||
# raise NotImplementedError
|
||||
return model_size
|
||||
|
||||
|
||||
def generate_config_ini(estimator, estimator_configspace):
|
||||
|
||||
|
||||
config_dic = {}
|
||||
config_dic_more = {}
|
||||
config_type_dic = {}
|
||||
for _, config in estimator_configspace.items():
|
||||
name, init = config.name, config.init
|
||||
type_, complexity_related = config.type, config.complexity_related
|
||||
config_type_dic[name] = type_
|
||||
if complexity_related:
|
||||
config_dic[name] = init
|
||||
else:
|
||||
config_dic_more[name] = init
|
||||
return config_dic, config_dic_more, {**config_dic, **config_dic_more}, \
|
||||
config_type_dic
|
||||
|
||||
|
||||
def generate_config_min(estimator,estimator_configspace, max_config_size):
|
||||
|
||||
|
||||
config_dic = {}
|
||||
config_dic_more = {}
|
||||
for _, config in estimator_configspace.items():
|
||||
name, lower = config.name, config.lower
|
||||
complexity_related = config.complexity_related
|
||||
if complexity_related:
|
||||
config_dic[name] = lower
|
||||
else:
|
||||
config_dic_more[name] = lower
|
||||
|
||||
return config_dic, config_dic_more, {**config_dic, **config_dic_more}
|
||||
|
||||
|
||||
def generate_config_max(estimator, estimator_configspace, max_config_size):
|
||||
|
||||
|
||||
config_dic = {}
|
||||
config_dic_more = {}
|
||||
for _, config in estimator_configspace.items():
|
||||
name, upper = config.name, config.upper
|
||||
complexity_related = config.complexity_related
|
||||
if complexity_related:
|
||||
if name in ('n_estimators', 'max_leaves'):
|
||||
config_dic[name] = min(upper, max_config_size)
|
||||
else:
|
||||
config_dic[name] = upper
|
||||
else:
|
||||
config_dic_more[name] = upper
|
||||
return config_dic, config_dic_more, {**config_dic, **config_dic_more}
|
||||
|
||||
|
||||
def get_config_values(config_dic, config_type_dic):
|
||||
value_list = []
|
||||
for k in config_dic.keys():
|
||||
org_v = config_dic[k]
|
||||
if config_type_dic[k] == int:
|
||||
v = int(round(org_v))
|
||||
value_list.append(v)
|
||||
else:
|
||||
value_list.append(org_v)
|
||||
return value_list
|
|
@ -1,5 +1,5 @@
|
|||
'''!
|
||||
* Copyright (c) 2020 Microsoft Corporation. All rights reserved.
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License.
|
||||
'''
|
||||
|
||||
|
|
|
@ -0,0 +1,181 @@
|
|||
# Economical Hyperparameter Optimization
|
||||
|
||||
`flaml.tune` is a module for economical hyperparameter tuning. It frees users from manually tuning many hyperparameters for a software, such as machine learning training procedures.
|
||||
The API is compatible with ray tune.
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
from flaml import tune
|
||||
import time
|
||||
|
||||
def evaluate_config(config):
|
||||
'''evaluate a hyperparameter configuration'''
|
||||
# we uss a toy example with 2 hyperparameters
|
||||
metric = (round(config['x'])-85000)**2 - config['x']/config['y']
|
||||
# usually the evaluation takes an non-neglible cost
|
||||
# and the cost could be related to certain hyperparameters
|
||||
# in this example, we assume it's proportional to x
|
||||
time.sleep(config['x']/100000)
|
||||
# use tune.report to report the metric to optimize
|
||||
tune.report(metric=metric)
|
||||
|
||||
analysis = tune.run(
|
||||
evaluate_config, # the function to evaluate a config
|
||||
config={
|
||||
'x': tune.qloguniform(lower=1, upper=100000, q=1),
|
||||
'y': tune.randint(lower=1, upper=100000)
|
||||
}, # the search space
|
||||
init_config={'x':1}, # a initial (partial) config with low cost
|
||||
metric='metric', # the name of the metric used for optimization
|
||||
mode='min', # the optimization mode, 'min' or 'max'
|
||||
num_samples=-1, # the maximal number of configs to try, -1 means infinite
|
||||
time_budget_s=60, # the time budget in seconds
|
||||
local_dir='logs/', # the local directory to store logs
|
||||
# verbose=0, # verbosity
|
||||
# use_ray=True, # uncomment when performing parallel tuning using ray
|
||||
)
|
||||
|
||||
print(analysis.best_trial.last_result) # the best trial's result
|
||||
print(analysis.best_config) # the best config
|
||||
```
|
||||
|
||||
Or, using ray tune's API:
|
||||
```python
|
||||
from ray import tune as raytune
|
||||
from flaml import CFO, BlendSearch
|
||||
import time
|
||||
|
||||
def evaluate_config(config):
|
||||
'''evaluate a hyperparameter configuration'''
|
||||
# we uss a toy example with 2 hyperparameters
|
||||
metric = (round(config['x'])-85000)**2 - config['x']/config['y']
|
||||
# usually the evaluation takes an non-neglible cost
|
||||
# and the cost could be related to certain hyperparameters
|
||||
# in this example, we assume it's proportional to x
|
||||
time.sleep(config['x']/100000)
|
||||
# use tune.report to report the metric to optimize
|
||||
tune.report(metric=metric)
|
||||
|
||||
analysis = raytune.run(
|
||||
evaluate_config, # the function to evaluate a config
|
||||
config={
|
||||
'x': tune.qloguniform(lower=1, upper=100000, q=1),
|
||||
'y': tune.randint(lower=1, upper=100000)
|
||||
}, # the search space
|
||||
metric='metric', # the name of the metric used for optimization
|
||||
mode='min', # the optimization mode, 'min' or 'max'
|
||||
num_samples=-1, # the maximal number of configs to try, -1 means infinite
|
||||
time_budget_s=60, # the time budget in seconds
|
||||
local_dir='logs/', # the local directory to store logs
|
||||
search_alg=CFO(points_to_evaluate=[{'x':1}]) # or BlendSearch
|
||||
# other algo example: raytune.create_searcher('optuna'),
|
||||
)
|
||||
|
||||
print(analysis.best_trial.last_result) # the best trial's result
|
||||
print(analysis.best_config) # the best config
|
||||
```
|
||||
|
||||
For more examples, please check out
|
||||
[notebooks](https://github.com/microsoft/FLAML/tree/main/notebook/).
|
||||
|
||||
|
||||
`flaml` offers two HPO methods: CFO and BlendSearch.
|
||||
`flaml.tune` uses BlendSearch by default.
|
||||
|
||||
## CFO: Frugal Optimization for Cost-related Hyperparameters
|
||||
|
||||
<p align="center">
|
||||
<img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/CFO.png" width=200>
|
||||
<br>
|
||||
</p>
|
||||
|
||||
CFO uses the randomized direct search method FLOW<sup>2</sup> with adaptive stepsize and random restart.
|
||||
It requires a low-cost initial point as input if such point exists.
|
||||
The search begins with the low-cost initial point and gradually move to
|
||||
high cost region if needed. The local search method has a provable convergence
|
||||
rate and bounded cost.
|
||||
|
||||
About FLOW<sup>2</sup>: FLOW<sup>2</sup> is a simple yet effective randomized direct search method.
|
||||
It is an iterative optimization method that can optimize for black-box functions.
|
||||
FLOW<sup>2</sup> only requires pairwise comparisons between function values to perform iterative update. Comparing to existing HPO methods, FLOW<sup>2</sup> has the following appealing properties:
|
||||
1. It is applicable to general black-box functions with a good convergence rate in terms of loss.
|
||||
3. It provides theoretical guarantees on the total evaluation cost incurred.
|
||||
|
||||
The GIFs attached below demostrates an example search trajectory of FLOW<sup>2</sup> shown in the loss and evaluation cost (i.e., the training time ) space respectively. From the demonstration, we can see that (1) FLOW<sup>2</sup> can quickly move toward the low-loss region, showing good convergence property and (2) FLOW<sup>2</sup> tends to avoid exploring the high-cost region until necessary.
|
||||
|
||||
<p align="center">
|
||||
<img align="center", src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/heatmap_loss_cfo_12s.gif" width=360> <img align="center", src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/heatmap_cost_cfo_12s.gif" width=360>
|
||||
<br>
|
||||
<figcaption>Figure 1. FLOW<sup>2</sup> in tuning the # of leaves and the # of trees for XGBoost. The two background heatmaps show the loss and cost distribution of all configurations. The black dots are the points evaluated in FLOW<sup>2</sup>. Black dots connected by lines are points that yield better loss performance when evaluated.</figcaption>
|
||||
</p>
|
||||
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
from flaml import CFO
|
||||
tune.run(...
|
||||
search_alg = CFO(points_to_evaluate=[init_config]),
|
||||
)
|
||||
```
|
||||
|
||||
Recommended scenario: there exist cost-related hyperparameters and a low-cost
|
||||
initial point is known before optimization.
|
||||
If the search space is complex and CFO gets trapped into local optima, consider
|
||||
using BlendSearch.
|
||||
|
||||
## BlendSearch: Economical Hyperparameter Optimization With Blended Search Strategy
|
||||
|
||||
<p align="center">
|
||||
<img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/BlendSearch.png" width=200>
|
||||
<br>
|
||||
</p>
|
||||
|
||||
BlendSearch combines local search with global search. It leverages the frugality
|
||||
of CFO and the space exploration ability of global search methods such as
|
||||
Bayesian optimization. Like CFO, BlendSearch requires a low-cost initial point
|
||||
as input if such point exists, and starts the search from there. Different from
|
||||
CFO, BlendSearch will not wait for the local search to fully converge before
|
||||
trying new start points. The new start points are suggested by the global search
|
||||
method and filtered based on their distance to the existing points in the
|
||||
cost-related dimensions. BlendSearch still gradually increases the trial cost.
|
||||
It prioritizes among the global search thread and multiple local search threads
|
||||
based on optimism in face of uncertainty.
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
from flaml import BlendSearch
|
||||
tune.run(...
|
||||
search_alg = BlendSearch(points_to_evaluate=[init_config]),
|
||||
)
|
||||
```
|
||||
|
||||
Recommended scenario: cost-related hyperparameters exist, a low-cost
|
||||
initial point is known, and the search space is complex such that local search
|
||||
is prone to be stuck at local optima.
|
||||
|
||||
For more technical details, please check our papers.
|
||||
|
||||
* [Frugal Optimization for Cost-related Hyperparameters](https://arxiv.org/abs/2005.01571). Qingyun Wu, Chi Wang, Silu Huang. AAAI 2021.
|
||||
|
||||
```
|
||||
@inproceedings{wu2021cfo,
|
||||
title={Frugal Optimization for Cost-related Hyperparameters},
|
||||
author={Qingyun Wu and Chi Wang and Silu Huang},
|
||||
year={2021},
|
||||
booktitle={AAAI'21},
|
||||
}
|
||||
```
|
||||
|
||||
* Economical Hyperparameter Optimization With Blended Search Strategy. Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. To appear in ICLR 2021.
|
||||
|
||||
```
|
||||
@inproceedings{wang2021blendsearch,
|
||||
title={Economical Hyperparameter Optimization With Blended Search Strategy},
|
||||
author={Chi Wang and Qingyun Wu and Silu Huang and Amin Saied},
|
||||
year={2021},
|
||||
booktitle={ICLR'21},
|
||||
}
|
||||
```
|
|
@ -0,0 +1,7 @@
|
|||
try:
|
||||
from ray.tune import (uniform, quniform, choice, randint, qrandint, randn,
|
||||
qrandn, loguniform, qloguniform)
|
||||
except:
|
||||
from .sample import (uniform, quniform, choice, randint, qrandint, randn,
|
||||
qrandn, loguniform, qloguniform)
|
||||
from .tune import run, report
|
|
@ -0,0 +1,180 @@
|
|||
'''
|
||||
Copyright 2020 The Ray Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
This source file is adapted here because ray does not fully support Windows.
|
||||
'''
|
||||
from typing import Dict, Optional
|
||||
import numpy as np
|
||||
from .trial import Trial
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_nan_or_inf(value):
|
||||
return np.isnan(value) or np.isinf(value)
|
||||
|
||||
|
||||
class ExperimentAnalysis:
|
||||
"""Analyze results from a Tune experiment.
|
||||
"""
|
||||
|
||||
@property
|
||||
def best_trial(self) -> Trial:
|
||||
"""Get the best trial of the experiment
|
||||
The best trial is determined by comparing the last trial results
|
||||
using the `metric` and `mode` parameters passed to `tune.run()`.
|
||||
If you didn't pass these parameters, use
|
||||
`get_best_trial(metric, mode, scope)` instead.
|
||||
"""
|
||||
if not self.default_metric or not self.default_mode:
|
||||
raise ValueError(
|
||||
"To fetch the `best_trial`, pass a `metric` and `mode` "
|
||||
"parameter to `tune.run()`. Alternatively, use the "
|
||||
"`get_best_trial(metric, mode)` method to set the metric "
|
||||
"and mode explicitly.")
|
||||
return self.get_best_trial(self.default_metric, self.default_mode)
|
||||
|
||||
@property
|
||||
def best_config(self) -> Dict:
|
||||
"""Get the config of the best trial of the experiment
|
||||
The best trial is determined by comparing the last trial results
|
||||
using the `metric` and `mode` parameters passed to `tune.run()`.
|
||||
If you didn't pass these parameters, use
|
||||
`get_best_config(metric, mode, scope)` instead.
|
||||
"""
|
||||
if not self.default_metric or not self.default_mode:
|
||||
raise ValueError(
|
||||
"To fetch the `best_config`, pass a `metric` and `mode` "
|
||||
"parameter to `tune.run()`. Alternatively, use the "
|
||||
"`get_best_config(metric, mode)` method to set the metric "
|
||||
"and mode explicitly.")
|
||||
return self.get_best_config(self.default_metric, self.default_mode)
|
||||
|
||||
def _validate_metric(self, metric: str) -> str:
|
||||
if not metric and not self.default_metric:
|
||||
raise ValueError(
|
||||
"No `metric` has been passed and `default_metric` has "
|
||||
"not been set. Please specify the `metric` parameter.")
|
||||
return metric or self.default_metric
|
||||
|
||||
def _validate_mode(self, mode: str) -> str:
|
||||
if not mode and not self.default_mode:
|
||||
raise ValueError(
|
||||
"No `mode` has been passed and `default_mode` has "
|
||||
"not been set. Please specify the `mode` parameter.")
|
||||
if mode and mode not in ["min", "max"]:
|
||||
raise ValueError("If set, `mode` has to be one of [min, max]")
|
||||
return mode or self.default_mode
|
||||
|
||||
def get_best_trial(self,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
scope: str = "last",
|
||||
filter_nan_and_inf: bool = True) -> Optional[Trial]:
|
||||
"""Retrieve the best trial object.
|
||||
Compares all trials' scores on ``metric``.
|
||||
If ``metric`` is not specified, ``self.default_metric`` will be used.
|
||||
If `mode` is not specified, ``self.default_mode`` will be used.
|
||||
These values are usually initialized by passing the ``metric`` and
|
||||
``mode`` parameters to ``tune.run()``.
|
||||
Args:
|
||||
metric (str): Key for trial info to order on. Defaults to
|
||||
``self.default_metric``.
|
||||
mode (str): One of [min, max]. Defaults to ``self.default_mode``.
|
||||
scope (str): One of [all, last, avg, last-5-avg, last-10-avg].
|
||||
If `scope=last`, only look at each trial's final step for
|
||||
`metric`, and compare across trials based on `mode=[min,max]`.
|
||||
If `scope=avg`, consider the simple average over all steps
|
||||
for `metric` and compare across trials based on
|
||||
`mode=[min,max]`. If `scope=last-5-avg` or `scope=last-10-avg`,
|
||||
consider the simple average over the last 5 or 10 steps for
|
||||
`metric` and compare across trials based on `mode=[min,max]`.
|
||||
If `scope=all`, find each trial's min/max score for `metric`
|
||||
based on `mode`, and compare trials based on `mode=[min,max]`.
|
||||
filter_nan_and_inf (bool): If True (default), NaN or infinite
|
||||
values are disregarded and these trials are never selected as
|
||||
the best trial.
|
||||
"""
|
||||
metric = self._validate_metric(metric)
|
||||
mode = self._validate_mode(mode)
|
||||
|
||||
if scope not in ["all", "last", "avg", "last-5-avg", "last-10-avg"]:
|
||||
raise ValueError(
|
||||
"ExperimentAnalysis: attempting to get best trial for "
|
||||
"metric {} for scope {} not in [\"all\", \"last\", \"avg\", "
|
||||
"\"last-5-avg\", \"last-10-avg\"]. "
|
||||
"If you didn't pass a `metric` parameter to `tune.run()`, "
|
||||
"you have to pass one when fetching the best trial.".format(
|
||||
metric, scope))
|
||||
best_trial = None
|
||||
best_metric_score = None
|
||||
for trial in self.trials:
|
||||
if metric not in trial.metric_analysis:
|
||||
continue
|
||||
|
||||
if scope in ["last", "avg", "last-5-avg", "last-10-avg"]:
|
||||
metric_score = trial.metric_analysis[metric][scope]
|
||||
else:
|
||||
metric_score = trial.metric_analysis[metric][mode]
|
||||
|
||||
if filter_nan_and_inf and is_nan_or_inf(metric_score):
|
||||
continue
|
||||
|
||||
if best_metric_score is None:
|
||||
best_metric_score = metric_score
|
||||
best_trial = trial
|
||||
continue
|
||||
|
||||
if (mode == "max") and (best_metric_score < metric_score):
|
||||
best_metric_score = metric_score
|
||||
best_trial = trial
|
||||
elif (mode == "min") and (best_metric_score > metric_score):
|
||||
best_metric_score = metric_score
|
||||
best_trial = trial
|
||||
|
||||
if not best_trial:
|
||||
logger.warning(
|
||||
"Could not find best trial. Did you pass the correct `metric` "
|
||||
"parameter?")
|
||||
return best_trial
|
||||
|
||||
def get_best_config(self,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
scope: str = "last") -> Optional[Dict]:
|
||||
"""Retrieve the best config corresponding to the trial.
|
||||
Compares all trials' scores on `metric`.
|
||||
If ``metric`` is not specified, ``self.default_metric`` will be used.
|
||||
If `mode` is not specified, ``self.default_mode`` will be used.
|
||||
These values are usually initialized by passing the ``metric`` and
|
||||
``mode`` parameters to ``tune.run()``.
|
||||
Args:
|
||||
metric (str): Key for trial info to order on. Defaults to
|
||||
``self.default_metric``.
|
||||
mode (str): One of [min, max]. Defaults to ``self.default_mode``.
|
||||
scope (str): One of [all, last, avg, last-5-avg, last-10-avg].
|
||||
If `scope=last`, only look at each trial's final step for
|
||||
`metric`, and compare across trials based on `mode=[min,max]`.
|
||||
If `scope=avg`, consider the simple average over all steps
|
||||
for `metric` and compare across trials based on
|
||||
`mode=[min,max]`. If `scope=last-5-avg` or `scope=last-10-avg`,
|
||||
consider the simple average over the last 5 or 10 steps for
|
||||
`metric` and compare across trials based on `mode=[min,max]`.
|
||||
If `scope=all`, find each trial's min/max score for `metric`
|
||||
based on `mode`, and compare trials based on `mode=[min,max]`.
|
||||
"""
|
||||
best_trial = self.get_best_trial(metric, mode, scope)
|
||||
return best_trial.config if best_trial else None
|
|
@ -0,0 +1,535 @@
|
|||
'''
|
||||
Copyright 2020 The Ray Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
This source file is included here because ray does not fully support Windows.
|
||||
'''
|
||||
import logging
|
||||
import random
|
||||
from copy import copy
|
||||
from inspect import signature
|
||||
from math import isclose
|
||||
from typing import Any, Callable, Dict, List, Optional, Sequence, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Domain:
|
||||
"""Base class to specify a type and valid range to sample parameters from.
|
||||
This base class is implemented by parameter spaces, like float ranges
|
||||
(``Float``), integer ranges (``Integer``), or categorical variables
|
||||
(``Categorical``). The ``Domain`` object contains information about
|
||||
valid values (e.g. minimum and maximum values), and exposes methods that
|
||||
allow specification of specific samplers (e.g. ``uniform()`` or
|
||||
``loguniform()``).
|
||||
"""
|
||||
sampler = None
|
||||
default_sampler_cls = None
|
||||
|
||||
def cast(self, value):
|
||||
"""Cast value to domain type"""
|
||||
return value
|
||||
|
||||
def set_sampler(self, sampler, allow_override=False):
|
||||
if self.sampler and not allow_override:
|
||||
raise ValueError("You can only choose one sampler for parameter "
|
||||
"domains. Existing sampler for parameter {}: "
|
||||
"{}. Tried to add {}".format(
|
||||
self.__class__.__name__, self.sampler,
|
||||
sampler))
|
||||
self.sampler = sampler
|
||||
|
||||
def get_sampler(self):
|
||||
sampler = self.sampler
|
||||
if not sampler:
|
||||
sampler = self.default_sampler_cls()
|
||||
return sampler
|
||||
|
||||
def sample(self, spec=None, size=1):
|
||||
sampler = self.get_sampler()
|
||||
return sampler.sample(self, spec=spec, size=size)
|
||||
|
||||
def is_grid(self):
|
||||
return isinstance(self.sampler, Grid)
|
||||
|
||||
def is_function(self):
|
||||
return False
|
||||
|
||||
def is_valid(self, value: Any):
|
||||
"""Returns True if `value` is a valid value in this domain."""
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def domain_str(self):
|
||||
return "(unknown)"
|
||||
|
||||
|
||||
class Sampler:
|
||||
def sample(self,
|
||||
domain: Domain,
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class BaseSampler(Sampler):
|
||||
def __str__(self):
|
||||
return "Base"
|
||||
|
||||
|
||||
class Uniform(Sampler):
|
||||
def __str__(self):
|
||||
return "Uniform"
|
||||
|
||||
|
||||
class LogUniform(Sampler):
|
||||
def __init__(self, base: float = 10):
|
||||
self.base = base
|
||||
assert self.base > 0, "Base has to be strictly greater than 0"
|
||||
|
||||
def __str__(self):
|
||||
return "LogUniform"
|
||||
|
||||
|
||||
class Normal(Sampler):
|
||||
def __init__(self, mean: float = 0., sd: float = 0.):
|
||||
self.mean = mean
|
||||
self.sd = sd
|
||||
|
||||
assert self.sd > 0, "SD has to be strictly greater than 0"
|
||||
|
||||
def __str__(self):
|
||||
return "Normal"
|
||||
|
||||
|
||||
class Grid(Sampler):
|
||||
"""Dummy sampler used for grid search"""
|
||||
|
||||
def sample(self,
|
||||
domain: Domain,
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1):
|
||||
return RuntimeError("Do not call `sample()` on grid.")
|
||||
|
||||
|
||||
class Float(Domain):
|
||||
class _Uniform(Uniform):
|
||||
def sample(self,
|
||||
domain: "Float",
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1):
|
||||
assert domain.lower > float("-inf"), \
|
||||
"Uniform needs a lower bound"
|
||||
assert domain.upper < float("inf"), \
|
||||
"Uniform needs a upper bound"
|
||||
items = np.random.uniform(domain.lower, domain.upper, size=size)
|
||||
return items if len(items) > 1 else domain.cast(items[0])
|
||||
|
||||
class _LogUniform(LogUniform):
|
||||
def sample(self,
|
||||
domain: "Float",
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1):
|
||||
assert domain.lower > 0, \
|
||||
"LogUniform needs a lower bound greater than 0"
|
||||
assert 0 < domain.upper < float("inf"), \
|
||||
"LogUniform needs a upper bound greater than 0"
|
||||
logmin = np.log(domain.lower) / np.log(self.base)
|
||||
logmax = np.log(domain.upper) / np.log(self.base)
|
||||
|
||||
items = self.base**(np.random.uniform(logmin, logmax, size=size))
|
||||
return items if len(items) > 1 else domain.cast(items[0])
|
||||
|
||||
class _Normal(Normal):
|
||||
def sample(self,
|
||||
domain: "Float",
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1):
|
||||
assert not domain.lower or domain.lower == float("-inf"), \
|
||||
"Normal sampling does not allow a lower value bound."
|
||||
assert not domain.upper or domain.upper == float("inf"), \
|
||||
"Normal sampling does not allow a upper value bound."
|
||||
items = np.random.normal(self.mean, self.sd, size=size)
|
||||
return items if len(items) > 1 else domain.cast(items[0])
|
||||
|
||||
default_sampler_cls = _Uniform
|
||||
|
||||
def __init__(self, lower: Optional[float], upper: Optional[float]):
|
||||
# Need to explicitly check for None
|
||||
self.lower = lower if lower is not None else float("-inf")
|
||||
self.upper = upper if upper is not None else float("inf")
|
||||
|
||||
def cast(self, value):
|
||||
return float(value)
|
||||
|
||||
def uniform(self):
|
||||
if not self.lower > float("-inf"):
|
||||
raise ValueError(
|
||||
"Uniform requires a lower bound. Make sure to set the "
|
||||
"`lower` parameter of `Float()`.")
|
||||
if not self.upper < float("inf"):
|
||||
raise ValueError(
|
||||
"Uniform requires a upper bound. Make sure to set the "
|
||||
"`upper` parameter of `Float()`.")
|
||||
new = copy(self)
|
||||
new.set_sampler(self._Uniform())
|
||||
return new
|
||||
|
||||
def loguniform(self, base: float = 10):
|
||||
if not self.lower > 0:
|
||||
raise ValueError(
|
||||
"LogUniform requires a lower bound greater than 0."
|
||||
f"Got: {self.lower}. Did you pass a variable that has "
|
||||
"been log-transformed? If so, pass the non-transformed value "
|
||||
"instead.")
|
||||
if not 0 < self.upper < float("inf"):
|
||||
raise ValueError(
|
||||
"LogUniform requires a upper bound greater than 0. "
|
||||
f"Got: {self.lower}. Did you pass a variable that has "
|
||||
"been log-transformed? If so, pass the non-transformed value "
|
||||
"instead.")
|
||||
new = copy(self)
|
||||
new.set_sampler(self._LogUniform(base))
|
||||
return new
|
||||
|
||||
def normal(self, mean=0., sd=1.):
|
||||
new = copy(self)
|
||||
new.set_sampler(self._Normal(mean, sd))
|
||||
return new
|
||||
|
||||
def quantized(self, q: float):
|
||||
if self.lower > float("-inf") and not isclose(self.lower / q,
|
||||
round(self.lower / q)):
|
||||
raise ValueError(
|
||||
f"Your lower variable bound {self.lower} is not divisible by "
|
||||
f"quantization factor {q}.")
|
||||
if self.upper < float("inf") and not isclose(self.upper / q,
|
||||
round(self.upper / q)):
|
||||
raise ValueError(
|
||||
f"Your upper variable bound {self.upper} is not divisible by "
|
||||
f"quantization factor {q}.")
|
||||
|
||||
new = copy(self)
|
||||
new.set_sampler(Quantized(new.get_sampler(), q), allow_override=True)
|
||||
return new
|
||||
|
||||
def is_valid(self, value: float):
|
||||
return self.lower <= value <= self.upper
|
||||
|
||||
@property
|
||||
def domain_str(self):
|
||||
return f"({self.lower}, {self.upper})"
|
||||
|
||||
|
||||
class Integer(Domain):
|
||||
class _Uniform(Uniform):
|
||||
def sample(self,
|
||||
domain: "Integer",
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1):
|
||||
items = np.random.randint(domain.lower, domain.upper, size=size)
|
||||
return items if len(items) > 1 else domain.cast(items[0])
|
||||
|
||||
class _LogUniform(LogUniform):
|
||||
def sample(self,
|
||||
domain: "Integer",
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1):
|
||||
assert domain.lower > 0, \
|
||||
"LogUniform needs a lower bound greater than 0"
|
||||
assert 0 < domain.upper < float("inf"), \
|
||||
"LogUniform needs a upper bound greater than 0"
|
||||
logmin = np.log(domain.lower) / np.log(self.base)
|
||||
logmax = np.log(domain.upper) / np.log(self.base)
|
||||
|
||||
items = self.base**(np.random.uniform(logmin, logmax, size=size))
|
||||
items = np.round(items).astype(int)
|
||||
return items if len(items) > 1 else domain.cast(items[0])
|
||||
|
||||
default_sampler_cls = _Uniform
|
||||
|
||||
def __init__(self, lower, upper):
|
||||
self.lower = lower
|
||||
self.upper = upper
|
||||
|
||||
def cast(self, value):
|
||||
return int(value)
|
||||
|
||||
def quantized(self, q: int):
|
||||
new = copy(self)
|
||||
new.set_sampler(Quantized(new.get_sampler(), q), allow_override=True)
|
||||
return new
|
||||
|
||||
def uniform(self):
|
||||
new = copy(self)
|
||||
new.set_sampler(self._Uniform())
|
||||
return new
|
||||
|
||||
def loguniform(self, base: float = 10):
|
||||
if not self.lower > 0:
|
||||
raise ValueError(
|
||||
"LogUniform requires a lower bound greater than 0."
|
||||
f"Got: {self.lower}. Did you pass a variable that has "
|
||||
"been log-transformed? If so, pass the non-transformed value "
|
||||
"instead.")
|
||||
if not 0 < self.upper < float("inf"):
|
||||
raise ValueError(
|
||||
"LogUniform requires a upper bound greater than 0. "
|
||||
f"Got: {self.lower}. Did you pass a variable that has "
|
||||
"been log-transformed? If so, pass the non-transformed value "
|
||||
"instead.")
|
||||
new = copy(self)
|
||||
new.set_sampler(self._LogUniform(base))
|
||||
return new
|
||||
|
||||
def is_valid(self, value: int):
|
||||
return self.lower <= value <= self.upper
|
||||
|
||||
@property
|
||||
def domain_str(self):
|
||||
return f"({self.lower}, {self.upper})"
|
||||
|
||||
|
||||
class Categorical(Domain):
|
||||
class _Uniform(Uniform):
|
||||
def sample(self,
|
||||
domain: "Categorical",
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1):
|
||||
|
||||
items = random.choices(domain.categories, k=size)
|
||||
return items if len(items) > 1 else domain.cast(items[0])
|
||||
|
||||
default_sampler_cls = _Uniform
|
||||
|
||||
def __init__(self, categories: Sequence):
|
||||
self.categories = list(categories)
|
||||
|
||||
def uniform(self):
|
||||
new = copy(self)
|
||||
new.set_sampler(self._Uniform())
|
||||
return new
|
||||
|
||||
def grid(self):
|
||||
new = copy(self)
|
||||
new.set_sampler(Grid())
|
||||
return new
|
||||
|
||||
def __len__(self):
|
||||
return len(self.categories)
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.categories[item]
|
||||
|
||||
def is_valid(self, value: Any):
|
||||
return value in self.categories
|
||||
|
||||
@property
|
||||
def domain_str(self):
|
||||
return f"{self.categories}"
|
||||
|
||||
|
||||
class Function(Domain):
|
||||
class _CallSampler(BaseSampler):
|
||||
def sample(self,
|
||||
domain: "Function",
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1):
|
||||
if domain.pass_spec:
|
||||
items = [
|
||||
domain.func(spec[i] if isinstance(spec, list) else spec)
|
||||
for i in range(size)
|
||||
]
|
||||
else:
|
||||
items = [domain.func() for i in range(size)]
|
||||
|
||||
return items if len(items) > 1 else domain.cast(items[0])
|
||||
|
||||
default_sampler_cls = _CallSampler
|
||||
|
||||
def __init__(self, func: Callable):
|
||||
sig = signature(func)
|
||||
|
||||
pass_spec = True # whether we should pass `spec` when calling `func`
|
||||
try:
|
||||
sig.bind({})
|
||||
except TypeError:
|
||||
pass_spec = False
|
||||
|
||||
if not pass_spec:
|
||||
try:
|
||||
sig.bind()
|
||||
except TypeError as exc:
|
||||
raise ValueError(
|
||||
"The function passed to a `Function` parameter must be "
|
||||
"callable with either 0 or 1 parameters.") from exc
|
||||
|
||||
self.pass_spec = pass_spec
|
||||
self.func = func
|
||||
|
||||
def is_function(self):
|
||||
return True
|
||||
|
||||
def is_valid(self, value: Any):
|
||||
return True # This is user-defined, so lets not assume anything
|
||||
|
||||
@property
|
||||
def domain_str(self):
|
||||
return f"{self.func}()"
|
||||
|
||||
|
||||
class Quantized(Sampler):
|
||||
def __init__(self, sampler: Sampler, q: Union[float, int]):
|
||||
self.sampler = sampler
|
||||
self.q = q
|
||||
|
||||
assert self.sampler, "Quantized() expects a sampler instance"
|
||||
|
||||
def get_sampler(self):
|
||||
return self.sampler
|
||||
|
||||
def sample(self,
|
||||
domain: Domain,
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1):
|
||||
values = self.sampler.sample(domain, spec, size)
|
||||
quantized = np.round(np.divide(values, self.q)) * self.q
|
||||
if not isinstance(quantized, np.ndarray):
|
||||
return domain.cast(quantized)
|
||||
return list(quantized)
|
||||
|
||||
|
||||
# TODO (krfricke): Remove tune.function
|
||||
def function(func):
|
||||
logger.warning(
|
||||
"DeprecationWarning: wrapping {} with tune.function() is no "
|
||||
"longer needed".format(func))
|
||||
return func
|
||||
|
||||
|
||||
def sample_from(func: Callable[[Dict], Any]):
|
||||
"""Specify that tune should sample configuration values from this function.
|
||||
Arguments:
|
||||
func: An callable function to draw a sample from.
|
||||
"""
|
||||
return Function(func)
|
||||
|
||||
|
||||
def uniform(lower: float, upper: float):
|
||||
"""Sample a float value uniformly between ``lower`` and ``upper``.
|
||||
Sampling from ``tune.uniform(1, 10)`` is equivalent to sampling from
|
||||
``np.random.uniform(1, 10))``
|
||||
"""
|
||||
return Float(lower, upper).uniform()
|
||||
|
||||
|
||||
def quniform(lower: float, upper: float, q: float):
|
||||
"""Sample a quantized float value uniformly between ``lower`` and ``upper``.
|
||||
Sampling from ``tune.uniform(1, 10)`` is equivalent to sampling from
|
||||
``np.random.uniform(1, 10))``
|
||||
The value will be quantized, i.e. rounded to an integer increment of ``q``.
|
||||
Quantization makes the upper bound inclusive.
|
||||
"""
|
||||
return Float(lower, upper).uniform().quantized(q)
|
||||
|
||||
|
||||
def loguniform(lower: float, upper: float, base: float = 10):
|
||||
"""Sugar for sampling in different orders of magnitude.
|
||||
Args:
|
||||
lower (float): Lower boundary of the output interval (e.g. 1e-4)
|
||||
upper (float): Upper boundary of the output interval (e.g. 1e-2)
|
||||
base (int): Base of the log. Defaults to 10.
|
||||
"""
|
||||
return Float(lower, upper).loguniform(base)
|
||||
|
||||
|
||||
def qloguniform(lower: float, upper: float, q: float, base: float = 10):
|
||||
"""Sugar for sampling in different orders of magnitude.
|
||||
The value will be quantized, i.e. rounded to an integer increment of ``q``.
|
||||
Quantization makes the upper bound inclusive.
|
||||
Args:
|
||||
lower (float): Lower boundary of the output interval (e.g. 1e-4)
|
||||
upper (float): Upper boundary of the output interval (e.g. 1e-2)
|
||||
q (float): Quantization number. The result will be rounded to an
|
||||
integer increment of this value.
|
||||
base (int): Base of the log. Defaults to 10.
|
||||
"""
|
||||
return Float(lower, upper).loguniform(base).quantized(q)
|
||||
|
||||
|
||||
def choice(categories: List):
|
||||
"""Sample a categorical value.
|
||||
Sampling from ``tune.choice([1, 2])`` is equivalent to sampling from
|
||||
``random.choice([1, 2])``
|
||||
"""
|
||||
return Categorical(categories).uniform()
|
||||
|
||||
|
||||
def randint(lower: int, upper: int):
|
||||
"""Sample an integer value uniformly between ``lower`` and ``upper``.
|
||||
``lower`` is inclusive, ``upper`` is exclusive.
|
||||
Sampling from ``tune.randint(10)`` is equivalent to sampling from
|
||||
``np.random.randint(10)``
|
||||
"""
|
||||
return Integer(lower, upper).uniform()
|
||||
|
||||
|
||||
def lograndint(lower: int, upper: int, base: float = 10):
|
||||
"""Sample an integer value log-uniformly between ``lower`` and ``upper``,
|
||||
with ``base`` being the base of logarithm.
|
||||
``lower`` is inclusive, ``upper`` is exclusive.
|
||||
"""
|
||||
return Integer(lower, upper).loguniform(base)
|
||||
|
||||
|
||||
def qrandint(lower: int, upper: int, q: int = 1):
|
||||
"""Sample an integer value uniformly between ``lower`` and ``upper``.
|
||||
``lower`` is inclusive, ``upper`` is also inclusive (!).
|
||||
The value will be quantized, i.e. rounded to an integer increment of ``q``.
|
||||
Quantization makes the upper bound inclusive.
|
||||
"""
|
||||
return Integer(lower, upper).uniform().quantized(q)
|
||||
|
||||
|
||||
def qlograndint(lower: int, upper: int, q: int, base: float = 10):
|
||||
"""Sample an integer value log-uniformly between ``lower`` and ``upper``,
|
||||
with ``base`` being the base of logarithm.
|
||||
``lower`` is inclusive, ``upper`` is also inclusive (!).
|
||||
The value will be quantized, i.e. rounded to an integer increment of ``q``.
|
||||
Quantization makes the upper bound inclusive.
|
||||
"""
|
||||
return Integer(lower, upper).loguniform(base).quantized(q)
|
||||
|
||||
|
||||
def randn(mean: float = 0., sd: float = 1.):
|
||||
"""Sample a float value normally with ``mean`` and ``sd``.
|
||||
Args:
|
||||
mean (float): Mean of the normal distribution. Defaults to 0.
|
||||
sd (float): SD of the normal distribution. Defaults to 1.
|
||||
"""
|
||||
return Float(None, None).normal(mean, sd)
|
||||
|
||||
|
||||
def qrandn(mean: float, sd: float, q: float):
|
||||
"""Sample a float value normally with ``mean`` and ``sd``.
|
||||
The value will be quantized, i.e. rounded to an integer increment of ``q``.
|
||||
Args:
|
||||
mean (float): Mean of the normal distribution.
|
||||
sd (float): SD of the normal distribution.
|
||||
q (float): Quantization number. The result will be rounded to an
|
||||
integer increment of this value.
|
||||
"""
|
||||
return Float(None, None).normal(mean, sd).quantized(q)
|
|
@ -0,0 +1,143 @@
|
|||
'''
|
||||
Copyright 2020 The Ray Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
This source file is adapted here because ray does not fully support Windows.
|
||||
'''
|
||||
import uuid
|
||||
import time
|
||||
from numbers import Number
|
||||
from collections import deque
|
||||
import copy
|
||||
|
||||
|
||||
def flatten_dict(dt, delimiter="/", prevent_delimiter=False):
|
||||
dt = copy.deepcopy(dt)
|
||||
if prevent_delimiter and any(delimiter in key for key in dt):
|
||||
# Raise if delimiter is any of the keys
|
||||
raise ValueError(
|
||||
"Found delimiter `{}` in key when trying to flatten array."
|
||||
"Please avoid using the delimiter in your specification.")
|
||||
while any(isinstance(v, dict) for v in dt.values()):
|
||||
remove = []
|
||||
add = {}
|
||||
for key, value in dt.items():
|
||||
if isinstance(value, dict):
|
||||
for subkey, v in value.items():
|
||||
if prevent_delimiter and delimiter in subkey:
|
||||
# Raise if delimiter is in any of the subkeys
|
||||
raise ValueError(
|
||||
"Found delimiter `{}` in key when trying to "
|
||||
"flatten array. Please avoid using the delimiter "
|
||||
"in your specification.")
|
||||
add[delimiter.join([key, str(subkey)])] = v
|
||||
remove.append(key)
|
||||
dt.update(add)
|
||||
for k in remove:
|
||||
del dt[k]
|
||||
return dt
|
||||
|
||||
|
||||
def unflatten_dict(dt, delimiter="/"):
|
||||
"""Unflatten dict. Does not support unflattening lists."""
|
||||
dict_type = type(dt)
|
||||
out = dict_type()
|
||||
for key, val in dt.items():
|
||||
path = key.split(delimiter)
|
||||
item = out
|
||||
for k in path[:-1]:
|
||||
item = item.setdefault(k, dict_type())
|
||||
item[path[-1]] = val
|
||||
return out
|
||||
|
||||
|
||||
class Trial:
|
||||
"""A trial object holds the state for one model training run.
|
||||
Trials are themselves managed by the TrialRunner class, which implements
|
||||
the event loop for submitting trial runs to a Ray cluster.
|
||||
Trials start in the PENDING state, and transition to RUNNING once started.
|
||||
On error it transitions to ERROR, otherwise TERMINATED on success.
|
||||
Attributes:
|
||||
trainable_name (str): Name of the trainable object to be executed.
|
||||
config (dict): Provided configuration dictionary with evaluated params.
|
||||
trial_id (str): Unique identifier for the trial.
|
||||
local_dir (str): Local_dir as passed to tune.run.
|
||||
logdir (str): Directory where the trial logs are saved.
|
||||
evaluated_params (dict): Evaluated parameters by search algorithm,
|
||||
experiment_tag (str): Identifying trial name to show in the console.
|
||||
resources (Resources): Amount of resources that this trial will use.
|
||||
status (str): One of PENDING, RUNNING, PAUSED, TERMINATED, ERROR/
|
||||
error_file (str): Path to the errors that this trial has raised.
|
||||
"""
|
||||
|
||||
PENDING = "PENDING"
|
||||
RUNNING = "RUNNING"
|
||||
PAUSED = "PAUSED"
|
||||
TERMINATED = "TERMINATED"
|
||||
ERROR = "ERROR"
|
||||
|
||||
@classmethod
|
||||
def generate_id(cls):
|
||||
return str(uuid.uuid1().hex)[:8]
|
||||
|
||||
def update_last_result(self, result):
|
||||
if self.experiment_tag:
|
||||
result.update(experiment_tag=self.experiment_tag)
|
||||
|
||||
self.last_result = result
|
||||
self.last_update_time = time.time()
|
||||
|
||||
for metric, value in flatten_dict(result).items():
|
||||
if isinstance(value, Number):
|
||||
if metric not in self.metric_analysis:
|
||||
self.metric_analysis[metric] = {
|
||||
"max": value,
|
||||
"min": value,
|
||||
"avg": value,
|
||||
"last": value
|
||||
}
|
||||
self.metric_n_steps[metric] = {}
|
||||
for n in self.n_steps:
|
||||
key = "last-{:d}-avg".format(n)
|
||||
self.metric_analysis[metric][key] = value
|
||||
# Store n as string for correct restore.
|
||||
self.metric_n_steps[metric][str(n)] = deque(
|
||||
[value], maxlen=n)
|
||||
else:
|
||||
step = result["training_iteration"] or 1
|
||||
self.metric_analysis[metric]["max"] = max(
|
||||
value, self.metric_analysis[metric]["max"])
|
||||
self.metric_analysis[metric]["min"] = min(
|
||||
value, self.metric_analysis[metric]["min"])
|
||||
self.metric_analysis[metric]["avg"] = 1 / step * (
|
||||
value +
|
||||
(step - 1) * self.metric_analysis[metric]["avg"])
|
||||
self.metric_analysis[metric]["last"] = value
|
||||
|
||||
for n in self.n_steps:
|
||||
key = "last-{:d}-avg".format(n)
|
||||
self.metric_n_steps[metric][str(n)].append(value)
|
||||
self.metric_analysis[metric][key] = sum(
|
||||
self.metric_n_steps[metric][str(n)]) / len(
|
||||
self.metric_n_steps[metric][str(n)])
|
||||
|
||||
def set_status(self, status):
|
||||
"""Sets the status of the trial."""
|
||||
self.status = status
|
||||
if status == Trial.RUNNING:
|
||||
if self.start_time is None:
|
||||
self.start_time = time.time()
|
||||
|
||||
def is_finished(self):
|
||||
return self.status in [Trial.ERROR, Trial.TERMINATED]
|
|
@ -0,0 +1,121 @@
|
|||
'''!
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See LICENSE file in the
|
||||
* project root for license information.
|
||||
'''
|
||||
from typing import Optional
|
||||
try:
|
||||
from ray.tune.trial import Trial
|
||||
except:
|
||||
from .trial import Trial
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Nologger():
|
||||
'''Logger without logging
|
||||
'''
|
||||
|
||||
def on_result(self, result): pass
|
||||
|
||||
|
||||
class SimpleTrial(Trial):
|
||||
'''A simple trial class
|
||||
'''
|
||||
|
||||
def __init__(self, config, trial_id = None):
|
||||
self.trial_id = Trial.generate_id() if trial_id is None else trial_id
|
||||
self.config = config or {}
|
||||
self.status = Trial.PENDING
|
||||
self.start_time = None
|
||||
self.last_result = {}
|
||||
self.last_update_time = -float("inf")
|
||||
self.custom_trial_name = None
|
||||
self.trainable_name = "trainable"
|
||||
self.experiment_tag = "exp"
|
||||
self.verbose = False
|
||||
self.result_logger = Nologger()
|
||||
self.metric_analysis = {}
|
||||
self.n_steps = [5, 10]
|
||||
self.metric_n_steps = {}
|
||||
|
||||
|
||||
class BaseTrialRunner:
|
||||
"""Implementation of a simple trial runner
|
||||
|
||||
Note that the caller usually should not mutate trial state directly.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
search_alg = None,
|
||||
scheduler = None,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = 'min'):
|
||||
self._search_alg = search_alg
|
||||
self._scheduler_alg = scheduler
|
||||
self._trials = []
|
||||
self._metric = metric
|
||||
self._mode = mode
|
||||
|
||||
def get_trials(self):
|
||||
"""Returns the list of trials managed by this TrialRunner.
|
||||
|
||||
Note that the caller usually should not mutate trial state directly.
|
||||
"""
|
||||
return self._trials
|
||||
|
||||
def add_trial(self, trial):
|
||||
"""Adds a new trial to this TrialRunner.
|
||||
|
||||
Trials may be added at any time.
|
||||
|
||||
Args:
|
||||
trial (Trial): Trial to queue.
|
||||
"""
|
||||
self._trials.append(trial)
|
||||
if self._scheduler_alg:
|
||||
self._scheduler_alg.on_trial_add(self, trial)
|
||||
|
||||
def process_trial_result(self, trial, result):
|
||||
trial.update_last_result(result)
|
||||
self._search_alg.on_trial_result(trial.trial_id, result)
|
||||
if self._scheduler_alg:
|
||||
decision = self._scheduler_alg.on_trial_result(self, trial, result)
|
||||
if decision == "STOP": trial.set_status(Trial.TERMINATED)
|
||||
elif decision == "PAUSE": trial.set_status(Trial.PAUSED)
|
||||
|
||||
def stop_trial(self, trial):
|
||||
"""Stops trial.
|
||||
"""
|
||||
if not trial.status in [Trial.ERROR, Trial.TERMINATED]:
|
||||
if self._scheduler_alg:
|
||||
self._scheduler_alg.on_trial_complete(self,
|
||||
trial.trial_id, trial.last_result)
|
||||
self._search_alg.on_trial_complete(
|
||||
trial.trial_id, trial.last_result)
|
||||
trial.set_status(Trial.TERMINATED)
|
||||
else:
|
||||
if self._scheduler_alg:
|
||||
self._scheduler_alg.on_trial_remove(self, trial)
|
||||
|
||||
|
||||
class SequentialTrialRunner(BaseTrialRunner):
|
||||
"""Implementation of the sequential trial runner
|
||||
"""
|
||||
|
||||
def step(self) -> Trial:
|
||||
"""Runs one step of the trial event loop.
|
||||
Callers should typically run this method repeatedly in a loop. They
|
||||
may inspect or modify the runner's state in between calls to step().
|
||||
|
||||
returns a Trial to run
|
||||
"""
|
||||
trial_id = Trial.generate_id()
|
||||
config = self._search_alg.suggest(trial_id)
|
||||
if config:
|
||||
trial = SimpleTrial(config, trial_id)
|
||||
self.add_trial(trial)
|
||||
trial.set_status(Trial.RUNNING)
|
||||
else: trial = None
|
||||
self.running_trial = trial
|
||||
return trial
|
|
@ -0,0 +1,295 @@
|
|||
'''!
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See LICENSE file in the
|
||||
* project root for license information.
|
||||
'''
|
||||
from typing import Optional, Union
|
||||
import datetime, time
|
||||
try:
|
||||
from ray.tune.analysis import ExperimentAnalysis as EA
|
||||
except:
|
||||
from .analysis import ExperimentAnalysis as EA
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_use_ray = True
|
||||
_runner = None
|
||||
_verbose = 0
|
||||
|
||||
|
||||
class ExperimentAnalysis(EA):
|
||||
'''Class for storing the experiment results
|
||||
'''
|
||||
|
||||
def __init__(self, trials, metric, mode):
|
||||
try:
|
||||
super().__init__(self, None, trials, metric, mode)
|
||||
except:
|
||||
self.trials = trials
|
||||
self.default_metric = metric
|
||||
self.default_mode = mode
|
||||
|
||||
|
||||
def report(_metric=None, **kwargs):
|
||||
'''A function called by the HPO application to report final or intermediate
|
||||
results.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import time
|
||||
from flaml import tune
|
||||
|
||||
def compute_with_config(config):
|
||||
current_time = time.time()
|
||||
metric2minimize = (round(config['x'])-95000)**2
|
||||
time2eval = time.time() - current_time
|
||||
tune.report(metric2minimize=metric2minimize, time2eval=time2eval)
|
||||
|
||||
analysis = tune.run(
|
||||
compute_with_config,
|
||||
init_config={},
|
||||
config={
|
||||
'x': tune.qloguniform(lower=1, upper=1000000, q=1),
|
||||
'y': tune.randint(lower=1, upper=1000000)
|
||||
},
|
||||
metric='metric2minimize', mode='min',
|
||||
num_samples=1000000, time_budget_s=60, use_ray=False)
|
||||
|
||||
print(analysis.trials[-1].last_result)
|
||||
|
||||
Args:
|
||||
_metric: Optional default anonymous metric for ``tune.report(value)``.
|
||||
(For compatibility with ray.tune.report)
|
||||
**kwargs: Any key value pair to be reported.
|
||||
'''
|
||||
global _use_ray
|
||||
global _verbose
|
||||
if _use_ray:
|
||||
from ray import tune
|
||||
return tune.report(_metric, **kwargs)
|
||||
else:
|
||||
result = kwargs
|
||||
if _verbose == 2:
|
||||
logger.info(f"result: {kwargs}")
|
||||
if _metric: result['_default_anonymous_metric'] = _metric
|
||||
trial = _runner.running_trial
|
||||
result['config'] = trial.config
|
||||
for key, value in trial.config.items():
|
||||
result['config/'+key] = value
|
||||
_runner.process_trial_result(_runner.running_trial, result)
|
||||
result['time_total_s'] = trial.last_update_time - trial.start_time
|
||||
if _verbose > 2:
|
||||
logger.info(f"result: {result}")
|
||||
if _runner.running_trial.is_finished():
|
||||
return None
|
||||
else: return True
|
||||
|
||||
|
||||
def run(training_function,
|
||||
init_config: dict,
|
||||
config: Optional[dict] = None,
|
||||
cat_hp_cost: Optional[dict] = None,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
time_budget_s: Union[int, float, datetime.timedelta] = None,
|
||||
prune_attr: Optional[str] = None,
|
||||
min_resource: Optional[float] = None,
|
||||
max_resource: Optional[float] = None,
|
||||
reduction_factor: Optional[float] = None,
|
||||
report_intermediate_result: Optional[bool] = False,
|
||||
search_alg = None,
|
||||
verbose: Optional[int] = 2,
|
||||
local_dir: Optional[str] = None,
|
||||
num_samples: Optional[int] = 1,
|
||||
resources_per_trial: Optional[dict] = None,
|
||||
mem_size = None,
|
||||
use_ray: Optional[bool] = False,
|
||||
):
|
||||
'''The trigger for HPO.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import time
|
||||
from flaml import tune
|
||||
|
||||
def compute_with_config(config):
|
||||
current_time = time.time()
|
||||
metric2minimize = (round(config['x'])-95000)**2
|
||||
time2eval = time.time() - current_time
|
||||
tune.report(metric2minimize=metric2minimize, time2eval=time2eval)
|
||||
|
||||
analysis = tune.run(
|
||||
compute_with_config,
|
||||
init_config={},
|
||||
config={
|
||||
'x': tune.qloguniform(lower=1, upper=1000000, q=1),
|
||||
'y': tune.randint(lower=1, upper=1000000)
|
||||
},
|
||||
metric='metric2minimize', mode='min',
|
||||
num_samples=-1, time_budget_s=60, use_ray=False)
|
||||
|
||||
print(analysis.trials[-1].last_result)
|
||||
|
||||
Args:
|
||||
training_function: A user-defined training function.
|
||||
init_config: A dictionary from a subset of controlled dimensions
|
||||
to the initial low-cost values. e.g.,
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{'epochs': 1}
|
||||
|
||||
If no such dimension, pass an empty dict {}.
|
||||
config: A dictionary to specify the search space.
|
||||
cat_hp_cost: A dictionary from a subset of categorical dimensions
|
||||
to the relative cost of each choice.
|
||||
e.g.,
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{'tree_method': [1, 1, 2]}
|
||||
|
||||
i.e., the relative cost of the
|
||||
three choices of 'tree_method' is 1, 1 and 2 respectively
|
||||
metric: A string of the metric name to optimize for.
|
||||
mode: A string in ['min', 'max'] to specify the objective as
|
||||
minimization or maximization.
|
||||
time_budget_s: A float of the time budget in seconds.
|
||||
prune_attr: A string of the attribute used for pruning.
|
||||
Not necessarily in space.
|
||||
When prune_attr is in space, it is a hyperparameter, e.g.,
|
||||
'n_iters', and the best value is unknown.
|
||||
When prune_attr is not in space, it is a resource dimension,
|
||||
e.g., 'sample_size', and the peak performance is assumed
|
||||
to be at the max_resource.
|
||||
min_resource: A float of the minimal resource to use for the
|
||||
prune_attr; only valid if prune_attr is not in space.
|
||||
max_resource: A float of the maximal resource to use for the
|
||||
prune_attr; only valid if prune_attr is not in space.
|
||||
reduction_factor: A float of the reduction factor used for incremental
|
||||
pruning.
|
||||
report_intermediate_result: A boolean of whether intermediate results
|
||||
are reported. If so, early stopping and pruning can be used.
|
||||
search_alg: An instance of BlendSearch as the search algorithm
|
||||
to be used. The same instance can be used for iterative tuning.
|
||||
e.g.,
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from flaml import BlendSearch
|
||||
algo = BlendSearch(metric='val_loss', mode='min',
|
||||
space=search_space,
|
||||
points_to_evaluate=points_to_evaluate)
|
||||
for i in range(10):
|
||||
analysis = tune.run(compute_with_config, init_config=None,
|
||||
search_alg=algo, use_ray=False)
|
||||
print(analysis.trials[-1].last_result)
|
||||
|
||||
verbose: 0, 1, 2, or 3. Verbosity mode for ray if ray backend is used.
|
||||
0 = silent, 1 = only status updates, 2 = status and brief trial
|
||||
results, 3 = status and detailed trial results. Defaults to 2.
|
||||
local_dir: A string of the local dir to save ray logs if ray backend is
|
||||
used.
|
||||
num_samples: An integer of the number of configs to try. Defaults to 1.
|
||||
resources_per_trial: A dictionary of the hardware resources to allocate
|
||||
per trial, e.g., `{'mem': 1024**3}`. When not using ray backend,
|
||||
only 'mem' is used as approximate resource constraints
|
||||
(in conjunction with mem_size).
|
||||
mem_size: A function to estimate the memory size for a given config.
|
||||
It is used to skip configs which do not fit in memory.
|
||||
use_ray: A boolean of whether to use ray as the backend
|
||||
'''
|
||||
global _use_ray
|
||||
global _verbose
|
||||
if not use_ray:
|
||||
_verbose = verbose
|
||||
if verbose > 0:
|
||||
import os
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler(local_dir+'/tune_'+str(
|
||||
datetime.datetime.now())+'.log'))
|
||||
if verbose<=2:
|
||||
logger.setLevel(logging.INFO)
|
||||
else:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
else:
|
||||
logger.setLevel(logging.CRITICAL)
|
||||
|
||||
if search_alg is None:
|
||||
from ..searcher.blendsearch import BlendSearch
|
||||
search_alg = BlendSearch(points_to_evaluate=[init_config],
|
||||
metric=metric, mode=mode,
|
||||
cat_hp_cost=cat_hp_cost,
|
||||
space=config, prune_attr=prune_attr,
|
||||
min_resource=min_resource,
|
||||
max_resource=max_resource,
|
||||
reduction_factor=reduction_factor,
|
||||
resources_per_trial=resources_per_trial,
|
||||
mem_size=mem_size)
|
||||
if time_budget_s:
|
||||
search_alg.set_search_properties(metric, mode, config={
|
||||
'time_budget_s':time_budget_s})
|
||||
if report_intermediate_result:
|
||||
params = {}
|
||||
# scheduler resource_dimension=prune_attr
|
||||
if prune_attr: params['time_attr'] = prune_attr
|
||||
if max_resource: params['max_t'] = max_resource
|
||||
if min_resource: params['grace_period'] = min_resource
|
||||
if reduction_factor: params['reduction_factor'] = reduction_factor
|
||||
try:
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
scheduler = ASHAScheduler(**params)
|
||||
except:
|
||||
scheduler = None
|
||||
else:
|
||||
scheduler = None
|
||||
|
||||
if use_ray:
|
||||
try:
|
||||
from ray import tune
|
||||
except:
|
||||
raise ImportError("Failed to import ray tune. "
|
||||
"Please install ray[tune] or set use_ray=False")
|
||||
_use_ray = True
|
||||
return tune.run(training_function,
|
||||
metric=metric,
|
||||
mode=mode,
|
||||
search_alg=search_alg,
|
||||
scheduler=scheduler,
|
||||
time_budget_s=time_budget_s,
|
||||
verbose=verbose,
|
||||
local_dir=local_dir,
|
||||
num_samples=num_samples,
|
||||
resources_per_trial=resources_per_trial
|
||||
)
|
||||
|
||||
# simple sequential run without using tune.run() from ray
|
||||
time_start = time.time()
|
||||
_use_ray = False
|
||||
if scheduler:
|
||||
scheduler.set_search_properties(metric=metric, mode=mode)
|
||||
from .trial_runner import SequentialTrialRunner
|
||||
global _runner
|
||||
_runner = SequentialTrialRunner(
|
||||
search_alg=search_alg,
|
||||
scheduler=scheduler,
|
||||
metric=metric,
|
||||
mode=mode,
|
||||
)
|
||||
num_trials = 0
|
||||
while time.time()-time_start<time_budget_s and (
|
||||
num_samples<0 or num_trials<num_samples):
|
||||
trial_to_run = _runner.step()
|
||||
if trial_to_run:
|
||||
num_trials += 1
|
||||
if verbose:
|
||||
logger.info(f'trial {num_trials} config: {trial_to_run.config}')
|
||||
training_function(trial_to_run.config)
|
||||
_runner.stop_trial(trial_to_run)
|
||||
return ExperimentAnalysis(_runner.get_trials(), metric=metric, mode=mode)
|
|
@ -1 +1 @@
|
|||
__version__ = "0.1.3"
|
||||
__version__ = "0.2.2"
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
15
setup.py
15
setup.py
|
@ -19,7 +19,8 @@ install_requires = [
|
|||
"xgboost>=0.90",
|
||||
"scipy>=1.4.1",
|
||||
"catboost>=0.23",
|
||||
"scikit-learn>=0.23",
|
||||
"scikit-learn>=0.23.2",
|
||||
"optuna==2.3.0"
|
||||
],
|
||||
|
||||
|
||||
|
@ -32,7 +33,7 @@ setuptools.setup(
|
|||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/microsoft/FLAML",
|
||||
packages=["flaml"],
|
||||
packages=setuptools.find_packages(),
|
||||
install_requires=install_requires,
|
||||
extras_require={
|
||||
"notebook": [
|
||||
|
@ -45,7 +46,17 @@ setuptools.setup(
|
|||
"flake8>=3.8.4",
|
||||
"pytest>=6.1.1",
|
||||
"coverage>=5.3",
|
||||
"xgboost<1.3",
|
||||
"rgf-python",
|
||||
# "hpbandster",
|
||||
# "torchvision"
|
||||
],
|
||||
"ray": [
|
||||
"ray[tune]==1.1.0",
|
||||
"pyyaml<5.3.1",
|
||||
],
|
||||
"azureml": [
|
||||
"azureml-mlflow"
|
||||
],
|
||||
},
|
||||
classifiers=[
|
||||
|
|
|
@ -7,58 +7,74 @@ from sklearn.datasets import load_boston, load_iris, load_wine
|
|||
from flaml import AutoML
|
||||
from flaml.data import get_output_from_log
|
||||
|
||||
from flaml.model import BaseEstimator
|
||||
from flaml.space import ConfigSearchInfo
|
||||
from flaml.model import SKLearnEstimator
|
||||
from rgf.sklearn import RGFClassifier, RGFRegressor
|
||||
from flaml import tune
|
||||
|
||||
|
||||
class MyRegularizedGreedyForest(BaseEstimator):
|
||||
class MyRegularizedGreedyForest(SKLearnEstimator):
|
||||
|
||||
# search space
|
||||
params_configsearch_info = {
|
||||
'max_leaf': ConfigSearchInfo(name = 'max_leaf',
|
||||
type = int, lower = 4, init = 4, upper = 10000),
|
||||
'n_iter': ConfigSearchInfo(name = 'n_iter', type = int, lower = 1,
|
||||
init = 1, upper = 32768),
|
||||
'n_tree_search': ConfigSearchInfo(name = 'n_tree_search', type = int,
|
||||
lower = 1, init = 1, upper = 32768),
|
||||
'opt_interval': ConfigSearchInfo(name = 'opt_interval', type = int,
|
||||
lower = 1, init = 100, upper = 10000),
|
||||
'learning_rate': ConfigSearchInfo(name = 'learning_rate', type = float,
|
||||
lower = 0.01, init = 1.0, upper = 20.0),
|
||||
'min_samples_leaf': ConfigSearchInfo(name = 'min_samples_leaf',
|
||||
type = int, lower = 1, init = 20, upper = 20)
|
||||
}
|
||||
|
||||
def __init__(self, objective_name = 'binary:logistic', n_jobs = 1,
|
||||
max_leaf = 1000, n_iter = 1, n_tree_search = 1, opt_interval = 1,
|
||||
learning_rate = 1.0, min_samples_leaf = 1):
|
||||
|
||||
self.objective_name = objective_name
|
||||
def __init__(self, task = 'binary:logistic', n_jobs = 1, max_leaf = 4,
|
||||
n_iter = 1, n_tree_search = 1, opt_interval = 1, learning_rate = 1.0,
|
||||
min_samples_leaf = 1, **params):
|
||||
|
||||
if 'regression' in objective_name:
|
||||
super().__init__(task, **params)
|
||||
|
||||
if 'regression' in task:
|
||||
self.estimator_class = RGFRegressor
|
||||
else:
|
||||
self.estimator_class = RGFClassifier
|
||||
|
||||
# round integer hyperparameters
|
||||
self.params = {
|
||||
"n_jobs": n_jobs,
|
||||
'max_leaf': int(round(max_leaf)),
|
||||
'n_iter': int(round(n_iter)),
|
||||
'n_tree_search': int(round(n_tree_search)),
|
||||
'opt_interval': int(round(opt_interval)),
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf':int(round(min_samples_leaf)),
|
||||
"n_jobs": n_jobs,
|
||||
}
|
||||
'min_samples_leaf':int(round(min_samples_leaf))
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, task):
|
||||
space = {
|
||||
'max_leaf': {'domain': tune.qloguniform(
|
||||
lower = 4, upper = data_size, q = 1), 'init_value': 4},
|
||||
'n_iter': {'domain': tune.qloguniform(
|
||||
lower = 1, upper = data_size, q = 1), 'init_value': 1},
|
||||
'n_tree_search': {'domain': tune.qloguniform(
|
||||
lower = 1, upper = 32768, q = 1), 'init_value': 1},
|
||||
'opt_interval': {'domain': tune.qloguniform(
|
||||
lower = 1, upper = 10000, q = 1), 'init_value': 100},
|
||||
'learning_rate': {'domain': tune.loguniform(
|
||||
lower = 0.01, upper = 20.0)},
|
||||
'min_samples_leaf': {'domain': tune.qloguniform(
|
||||
lower = 1, upper = 20, q = 1), 'init_value': 20},
|
||||
}
|
||||
return space
|
||||
|
||||
@classmethod
|
||||
def size(cls, config):
|
||||
max_leaves = int(round(config['max_leaf']))
|
||||
n_estimators = int(round(config['n_iter']))
|
||||
return (max_leaves*3 + (max_leaves-1)*4 + 1.0)*n_estimators*8
|
||||
|
||||
@classmethod
|
||||
def cost_relative2lgbm(cls):
|
||||
return 1.0
|
||||
|
||||
|
||||
def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):
|
||||
def custom_metric(X_test, y_test, estimator, labels, X_train, y_train,
|
||||
weight_test=None, weight_train=None):
|
||||
from sklearn.metrics import log_loss
|
||||
y_pred = estimator.predict_proba(X_test)
|
||||
test_loss = log_loss(y_test, y_pred, labels=labels)
|
||||
test_loss = log_loss(y_test, y_pred, labels=labels,
|
||||
sample_weight=weight_test)
|
||||
y_pred = estimator.predict_proba(X_train)
|
||||
train_loss = log_loss(y_train, y_pred, labels=labels)
|
||||
train_loss = log_loss(y_train, y_pred, labels=labels,
|
||||
sample_weight=weight_train)
|
||||
alpha = 0.5
|
||||
return test_loss * (1 + alpha) - alpha * train_loss, [test_loss, train_loss]
|
||||
|
||||
|
@ -77,6 +93,27 @@ class TestAutoML(unittest.TestCase):
|
|||
"sample": True, # whether to subsample training data
|
||||
"log_file_name": "test/wine.log",
|
||||
"log_training_metric": True, # whether to log training metric
|
||||
"n_jobs": 1,
|
||||
}
|
||||
|
||||
'''The main flaml automl API'''
|
||||
automl.fit(X_train = X_train, y_train = y_train, **settings)
|
||||
|
||||
def test_ensemble(self):
|
||||
automl = AutoML()
|
||||
automl.add_learner(learner_name = 'RGF',
|
||||
learner_class = MyRegularizedGreedyForest)
|
||||
X_train, y_train = load_wine(return_X_y=True)
|
||||
settings = {
|
||||
"time_budget": 10, # total running time in seconds
|
||||
# "estimator_list": ['lgbm', 'xgboost'],
|
||||
"estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
|
||||
"task": 'classification', # task type
|
||||
"sample": True, # whether to subsample training data
|
||||
"log_file_name": "test/wine.log",
|
||||
"log_training_metric": True, # whether to log training metric
|
||||
"ensemble": True,
|
||||
"n_jobs": 1,
|
||||
}
|
||||
|
||||
'''The main flaml automl API'''
|
||||
|
@ -87,6 +124,7 @@ class TestAutoML(unittest.TestCase):
|
|||
|
||||
def test_custom_metric(self):
|
||||
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 10,
|
||||
|
@ -96,9 +134,10 @@ class TestAutoML(unittest.TestCase):
|
|||
"log_file_name": "test/iris_custom.log",
|
||||
"log_training_metric": True,
|
||||
'log_type': 'all',
|
||||
"model_history": True
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
"sample_weight": np.ones(len(y_train)),
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings)
|
||||
print(automl_experiment.classes_)
|
||||
|
@ -111,7 +150,7 @@ class TestAutoML(unittest.TestCase):
|
|||
automl_experiment = AutoML()
|
||||
estimator = automl_experiment.get_estimator_from_log(
|
||||
automl_settings["log_file_name"], record_id=0,
|
||||
objective='multi')
|
||||
task='multi')
|
||||
print(estimator)
|
||||
time_history, best_valid_loss_history, valid_loss_history, \
|
||||
config_history, train_loss_history = get_output_from_log(
|
||||
|
@ -127,6 +166,7 @@ class TestAutoML(unittest.TestCase):
|
|||
"task": 'classification',
|
||||
"log_file_name": "test/iris.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
|
||||
|
@ -160,6 +200,7 @@ class TestAutoML(unittest.TestCase):
|
|||
"task": 'regression',
|
||||
"log_file_name": "test/boston.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
}
|
||||
X_train, y_train = load_boston(return_X_y=True)
|
||||
|
@ -167,7 +208,7 @@ class TestAutoML(unittest.TestCase):
|
|||
automl_experiment.fit(X_train=X_train[:n], y_train=y_train[:n],
|
||||
X_val=X_train[n:], y_val=y_train[n:],
|
||||
**automl_settings)
|
||||
assert automl_experiment.eval_method == 'holdout'
|
||||
assert automl_experiment._state.eval_method == 'holdout'
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
|
@ -185,6 +226,7 @@ class TestAutoML(unittest.TestCase):
|
|||
"task": 'classification',
|
||||
"log_file_name": "test/sparse_classification.log",
|
||||
"split_type": "uniform",
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
}
|
||||
X_train = scipy.sparse.random(1554, 21, dtype=int)
|
||||
|
@ -207,6 +249,7 @@ class TestAutoML(unittest.TestCase):
|
|||
"metric": 'mae',
|
||||
"task": 'regression',
|
||||
"log_file_name": "test/sparse_regression.log",
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
}
|
||||
X_train = scipy.sparse.random(300, 900, density=0.0001)
|
||||
|
@ -216,7 +259,7 @@ class TestAutoML(unittest.TestCase):
|
|||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
X_val=X_val, y_val=y_val,
|
||||
**automl_settings)
|
||||
assert automl_experiment.X_val.shape == X_val.shape
|
||||
assert automl_experiment._state.X_val.shape == X_val.shape
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
|
@ -237,6 +280,7 @@ class TestAutoML(unittest.TestCase):
|
|||
"log_file_name": "test/sparse_classification.log",
|
||||
"estimator_list": ["xgboost"],
|
||||
"log_type": "all",
|
||||
"n_jobs": 1,
|
||||
}
|
||||
X_train = scipy.sparse.eye(900000)
|
||||
y_train = np.random.randint(2, size=900000)
|
||||
|
@ -259,6 +303,7 @@ class TestAutoML(unittest.TestCase):
|
|||
"log_file_name": "test/sparse_classification.log",
|
||||
"estimator_list": ["lrl1", "lrl2"],
|
||||
"log_type": "all",
|
||||
"n_jobs": 1,
|
||||
}
|
||||
X_train = scipy.sparse.random(3000, 900, density=0.1)
|
||||
y_train = np.random.randint(2, size=3000)
|
||||
|
@ -279,6 +324,7 @@ class TestAutoML(unittest.TestCase):
|
|||
'eval_method': 'cv',
|
||||
"task": 'regression',
|
||||
"log_file_name": "test/sparse_regression.log",
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
}
|
||||
X_train = scipy.sparse.random(100, 100)
|
||||
|
|
|
@ -28,11 +28,12 @@ class TestLogging(unittest.TestCase):
|
|||
# Run a simple job.
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"time_budget": 1,
|
||||
"metric": 'mse',
|
||||
"task": 'regression',
|
||||
"log_file_name": training_log,
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
}
|
||||
X_train, y_train = load_boston(return_X_y=True)
|
||||
|
|
|
@ -0,0 +1,351 @@
|
|||
import unittest
|
||||
import os
|
||||
import time
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.addHandler(logging.FileHandler('test/tune_pytorch_cifar10.log'))
|
||||
|
||||
|
||||
# __load_data_begin__
|
||||
def load_data(data_dir="./data"):
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
|
||||
])
|
||||
|
||||
trainset = torchvision.datasets.CIFAR10(
|
||||
root=data_dir, train=True, download=True, transform=transform)
|
||||
|
||||
testset = torchvision.datasets.CIFAR10(
|
||||
root=data_dir, train=False, download=True, transform=transform)
|
||||
|
||||
return trainset, testset
|
||||
# __load_data_end__
|
||||
|
||||
|
||||
import numpy as np
|
||||
try:
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import random_split
|
||||
import torchvision
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
|
||||
# __net_begin__
|
||||
class Net(nn.Module):
|
||||
def __init__(self, l1=120, l2=84):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 6, 5)
|
||||
self.pool = nn.MaxPool2d(2, 2)
|
||||
self.conv2 = nn.Conv2d(6, 16, 5)
|
||||
self.fc1 = nn.Linear(16 * 5 * 5, l1)
|
||||
self.fc2 = nn.Linear(l1, l2)
|
||||
self.fc3 = nn.Linear(l2, 10)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.pool(F.relu(self.conv1(x)))
|
||||
x = self.pool(F.relu(self.conv2(x)))
|
||||
x = x.view(-1, 16 * 5 * 5)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
x = self.fc3(x)
|
||||
return x
|
||||
# __net_end__
|
||||
except ImportError:
|
||||
print("skip test_pytorch because torchvision cannot be imported.")
|
||||
|
||||
|
||||
# __load_data_begin__
|
||||
def load_data(data_dir="test/data"):
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
|
||||
])
|
||||
|
||||
trainset = torchvision.datasets.CIFAR10(
|
||||
root=data_dir, train=True, download=True, transform=transform)
|
||||
|
||||
testset = torchvision.datasets.CIFAR10(
|
||||
root=data_dir, train=False, download=True, transform=transform)
|
||||
|
||||
return trainset, testset
|
||||
# __load_data_end__
|
||||
|
||||
|
||||
# __train_begin__
|
||||
def train_cifar(config, checkpoint_dir=None, data_dir=None):
|
||||
if not "l1" in config:
|
||||
logger.warning(config)
|
||||
net = Net(2 ** config["l1"], 2 ** config["l2"])
|
||||
|
||||
device = "cpu"
|
||||
if torch.cuda.is_available():
|
||||
device = "cuda:0"
|
||||
if torch.cuda.device_count() > 1:
|
||||
net = nn.DataParallel(net)
|
||||
net.to(device)
|
||||
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
|
||||
|
||||
# The `checkpoint_dir` parameter gets passed by Ray Tune when a checkpoint
|
||||
# should be restored.
|
||||
if checkpoint_dir:
|
||||
checkpoint = os.path.join(checkpoint_dir, "checkpoint")
|
||||
model_state, optimizer_state = torch.load(checkpoint)
|
||||
net.load_state_dict(model_state)
|
||||
optimizer.load_state_dict(optimizer_state)
|
||||
|
||||
trainset, testset = load_data(data_dir)
|
||||
|
||||
test_abs = int(len(trainset) * 0.8)
|
||||
train_subset, val_subset = random_split(
|
||||
trainset, [test_abs, len(trainset) - test_abs])
|
||||
|
||||
trainloader = torch.utils.data.DataLoader(
|
||||
train_subset,
|
||||
batch_size=int(2**config["batch_size"]),
|
||||
shuffle=True,
|
||||
num_workers=4)
|
||||
valloader = torch.utils.data.DataLoader(
|
||||
val_subset,
|
||||
batch_size=int(2**config["batch_size"]),
|
||||
shuffle=True,
|
||||
num_workers=4)
|
||||
|
||||
from ray import tune
|
||||
|
||||
for epoch in range(int(round(config["num_epochs"]))): # loop over the dataset multiple times
|
||||
running_loss = 0.0
|
||||
epoch_steps = 0
|
||||
for i, data in enumerate(trainloader, 0):
|
||||
# get the inputs; data is a list of [inputs, labels]
|
||||
inputs, labels = data
|
||||
inputs, labels = inputs.to(device), labels.to(device)
|
||||
|
||||
# zero the parameter gradients
|
||||
optimizer.zero_grad()
|
||||
|
||||
# forward + backward + optimize
|
||||
outputs = net(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# print statistics
|
||||
running_loss += loss.item()
|
||||
epoch_steps += 1
|
||||
if i % 2000 == 1999: # print every 2000 mini-batches
|
||||
print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
|
||||
running_loss / epoch_steps))
|
||||
running_loss = 0.0
|
||||
|
||||
# Validation loss
|
||||
val_loss = 0.0
|
||||
val_steps = 0
|
||||
total = 0
|
||||
correct = 0
|
||||
for i, data in enumerate(valloader, 0):
|
||||
with torch.no_grad():
|
||||
inputs, labels = data
|
||||
inputs, labels = inputs.to(device), labels.to(device)
|
||||
|
||||
outputs = net(inputs)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum().item()
|
||||
|
||||
loss = criterion(outputs, labels)
|
||||
val_loss += loss.cpu().numpy()
|
||||
val_steps += 1
|
||||
|
||||
# Here we save a checkpoint. It is automatically registered with
|
||||
# Ray Tune and will potentially be passed as the `checkpoint_dir`
|
||||
# parameter in future iterations.
|
||||
with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
|
||||
path = os.path.join(checkpoint_dir, "checkpoint")
|
||||
torch.save(
|
||||
(net.state_dict(), optimizer.state_dict()), path)
|
||||
|
||||
tune.report(loss=(val_loss / val_steps), accuracy=correct / total)
|
||||
print("Finished Training")
|
||||
# __train_end__
|
||||
|
||||
|
||||
# __test_acc_begin__
|
||||
def _test_accuracy(net, device="cpu"):
|
||||
trainset, testset = load_data()
|
||||
|
||||
testloader = torch.utils.data.DataLoader(
|
||||
testset, batch_size=4, shuffle=False, num_workers=2)
|
||||
|
||||
correct = 0
|
||||
total = 0
|
||||
with torch.no_grad():
|
||||
for data in testloader:
|
||||
images, labels = data
|
||||
images, labels = images.to(device), labels.to(device)
|
||||
outputs = net(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum().item()
|
||||
|
||||
return correct / total
|
||||
# __test_acc_end__
|
||||
|
||||
|
||||
# __main_begin__
|
||||
def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
|
||||
gpus_per_trial=2):
|
||||
data_dir = os.path.abspath("test/data")
|
||||
load_data(data_dir) # Download data for all trials before starting the run
|
||||
if method == 'BlendSearch':
|
||||
from flaml import tune
|
||||
else:
|
||||
from ray import tune
|
||||
if method in ['BlendSearch', 'BOHB', 'Optuna']:
|
||||
config = {
|
||||
"l1": tune.randint(2, 8),
|
||||
"l2": tune.randint(2, 8),
|
||||
"lr": tune.loguniform(1e-4, 1e-1),
|
||||
"num_epochs": tune.qloguniform(1, max_num_epochs, q=1),
|
||||
"batch_size": tune.randint(1, 4)#tune.choice([2, 4, 8, 16])
|
||||
}
|
||||
else:
|
||||
config = {
|
||||
"l1": tune.randint(2, 9),
|
||||
"l2": tune.randint(2, 9),
|
||||
"lr": tune.loguniform(1e-4, 1e-1),
|
||||
"num_epochs": tune.qloguniform(1, max_num_epochs+1, q=1),
|
||||
"batch_size": tune.randint(1, 5)#tune.choice([2, 4, 8, 16])
|
||||
}
|
||||
import ray
|
||||
time_budget_s = 3600
|
||||
start_time = time.time()
|
||||
if method == 'BlendSearch':
|
||||
result = tune.run(
|
||||
ray.tune.with_parameters(train_cifar, data_dir=data_dir),
|
||||
init_config={
|
||||
"l1": 2,
|
||||
"l2": 2,
|
||||
"num_epochs": 1,
|
||||
"batch_size": 4,
|
||||
},
|
||||
metric="loss",
|
||||
mode="min",
|
||||
max_resource=max_num_epochs,
|
||||
min_resource=1,
|
||||
report_intermediate_result=True,
|
||||
resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
|
||||
config=config,
|
||||
local_dir='logs/',
|
||||
num_samples=num_samples,
|
||||
time_budget_s=time_budget_s,
|
||||
use_ray=True)
|
||||
else:
|
||||
if 'ASHA' == method:
|
||||
algo = None
|
||||
elif 'BOHB' == method:
|
||||
from ray.tune.schedulers import HyperBandForBOHB
|
||||
from ray.tune.suggest.bohb import TuneBOHB
|
||||
algo = TuneBOHB()
|
||||
scheduler = HyperBandForBOHB(max_t=max_num_epochs)
|
||||
elif 'Optuna' == method:
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
algo = OptunaSearch()
|
||||
elif 'CFO' == method:
|
||||
from flaml import CFO
|
||||
algo = CFO(points_to_evaluate=[{
|
||||
"l1": 2,
|
||||
"l2": 2,
|
||||
"num_epochs": 1,
|
||||
"batch_size": 4,
|
||||
}])
|
||||
elif 'Nevergrad' == method:
|
||||
from ray.tune.suggest.nevergrad import NevergradSearch
|
||||
import nevergrad as ng
|
||||
algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
|
||||
if method != 'BOHB':
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
scheduler = ASHAScheduler(
|
||||
max_t=max_num_epochs,
|
||||
grace_period=1)
|
||||
result = tune.run(
|
||||
tune.with_parameters(train_cifar, data_dir=data_dir),
|
||||
resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
|
||||
config=config,
|
||||
metric="loss",
|
||||
mode="min",
|
||||
num_samples=num_samples, time_budget_s=time_budget_s,
|
||||
scheduler=scheduler, search_alg=algo
|
||||
)
|
||||
ray.shutdown()
|
||||
|
||||
logger.info(f"method={method}")
|
||||
logger.info(f"n_samples={num_samples}")
|
||||
logger.info(f"time={time.time()-start_time}")
|
||||
best_trial = result.get_best_trial("loss", "min", "all")
|
||||
logger.info("Best trial config: {}".format(best_trial.config))
|
||||
logger.info("Best trial final validation loss: {}".format(
|
||||
best_trial.metric_analysis["loss"]["min"]))
|
||||
logger.info("Best trial final validation accuracy: {}".format(
|
||||
best_trial.metric_analysis["accuracy"]["max"]))
|
||||
|
||||
best_trained_model = Net(2**best_trial.config["l1"],
|
||||
2**best_trial.config["l2"])
|
||||
device = "cpu"
|
||||
if torch.cuda.is_available():
|
||||
device = "cuda:0"
|
||||
if gpus_per_trial > 1:
|
||||
best_trained_model = nn.DataParallel(best_trained_model)
|
||||
best_trained_model.to(device)
|
||||
|
||||
checkpoint_path = os.path.join(best_trial.checkpoint.value, "checkpoint")
|
||||
|
||||
model_state, optimizer_state = torch.load(checkpoint_path)
|
||||
best_trained_model.load_state_dict(model_state)
|
||||
|
||||
test_acc = _test_accuracy(best_trained_model, device)
|
||||
logger.info("Best trial test set accuracy: {}".format(test_acc))
|
||||
# __main_end__
|
||||
|
||||
|
||||
gpus_per_trial=0#.5
|
||||
num_samples=500
|
||||
|
||||
|
||||
def _test_cifar10_bs():
|
||||
cifar10_main(num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
|
||||
|
||||
def _test_cifar10_cfo():
|
||||
cifar10_main('CFO',
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
|
||||
|
||||
def _test_cifar10_optuna():
|
||||
cifar10_main('Optuna',
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
|
||||
|
||||
def _test_cifar10_asha():
|
||||
cifar10_main('ASHA',
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
|
||||
|
||||
def _test_cifar10_bohb():
|
||||
cifar10_main('BOHB',
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
|
||||
|
||||
def _test_cifar10_nevergrad():
|
||||
cifar10_main('Nevergrad',
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -23,6 +23,8 @@ class TestTrainingLog(unittest.TestCase):
|
|||
"task": 'regression',
|
||||
"log_file_name": filename,
|
||||
"log_training_metric": True,
|
||||
"mem_thres": 1024*1024,
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
}
|
||||
X_train, y_train = load_boston(return_X_y=True)
|
||||
|
|
|
@ -0,0 +1,200 @@
|
|||
import unittest
|
||||
import os
|
||||
import time
|
||||
from sklearn.model_selection import train_test_split
|
||||
import sklearn.metrics
|
||||
import sklearn.datasets
|
||||
try:
|
||||
from ray.tune.integration.xgboost import TuneReportCheckpointCallback
|
||||
except ImportError:
|
||||
print("skip test_tune because ray tune cannot be imported.")
|
||||
import xgboost as xgb
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.addHandler(logging.FileHandler('test/tune_xgboost.log'))
|
||||
|
||||
|
||||
def train_breast_cancer(config: dict):
|
||||
# This is a simple training function to be passed into Tune
|
||||
# Load dataset
|
||||
data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
|
||||
# Split into train and test set
|
||||
train_x, test_x, train_y, test_y = train_test_split(
|
||||
data, labels, test_size=0.25)
|
||||
# Build input matrices for XGBoost
|
||||
train_set = xgb.DMatrix(train_x, label=train_y)
|
||||
test_set = xgb.DMatrix(test_x, label=test_y)
|
||||
# HyperOpt returns a tuple
|
||||
config = config.copy()
|
||||
config["eval_metric"] = ["logloss", "error"]
|
||||
config["objective"] = "binary:logistic"
|
||||
# Train the classifier, using the Tune callback
|
||||
xgb.train(
|
||||
config,
|
||||
train_set,
|
||||
evals=[(test_set, "eval")],
|
||||
verbose_eval=False,
|
||||
callbacks=[TuneReportCheckpointCallback(filename="model.xgb")])
|
||||
|
||||
|
||||
def _test_xgboost(method='BlendSearch'):
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
if method == 'BlendSearch':
|
||||
from flaml import tune
|
||||
else:
|
||||
from ray import tune
|
||||
search_space = {
|
||||
# You can mix constants with search space objects.
|
||||
"max_depth": tune.randint(1, 8) if method in [
|
||||
"BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
|
||||
"min_child_weight": tune.choice([1, 2, 3]),
|
||||
"subsample": tune.uniform(0.5, 1.0),
|
||||
"eta": tune.loguniform(1e-4, 1e-1)
|
||||
}
|
||||
max_iter = 10
|
||||
for num_samples in [256]:
|
||||
time_budget_s = None
|
||||
for n_cpu in [8]:
|
||||
start_time = time.time()
|
||||
ray.init(num_cpus=n_cpu, num_gpus=0)
|
||||
if method == 'BlendSearch':
|
||||
analysis = tune.run(
|
||||
train_breast_cancer,
|
||||
init_config={
|
||||
"max_depth": 1,
|
||||
"min_child_weight": 3,
|
||||
},
|
||||
cat_hp_cost={
|
||||
"min_child_weight": [6, 3, 2],
|
||||
},
|
||||
metric="eval-logloss",
|
||||
mode="min",
|
||||
max_resource=max_iter,
|
||||
min_resource=1,
|
||||
report_intermediate_result=True,
|
||||
# You can add "gpu": 0.1 to allocate GPUs
|
||||
resources_per_trial={"cpu": 1},
|
||||
config=search_space,
|
||||
local_dir='logs/',
|
||||
num_samples=num_samples*n_cpu,
|
||||
time_budget_s=time_budget_s,
|
||||
use_ray=True)
|
||||
else:
|
||||
if 'ASHA' == method:
|
||||
algo = None
|
||||
elif 'BOHB' == method:
|
||||
from ray.tune.schedulers import HyperBandForBOHB
|
||||
from ray.tune.suggest.bohb import TuneBOHB
|
||||
algo = TuneBOHB(max_concurrent=n_cpu)
|
||||
scheduler = HyperBandForBOHB(max_t=max_iter)
|
||||
elif 'Optuna' == method:
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
algo = OptunaSearch()
|
||||
elif 'CFO' == method:
|
||||
from flaml import CFO
|
||||
algo = CFO(points_to_evaluate=[{
|
||||
"max_depth": 1,
|
||||
"min_child_weight": 3,
|
||||
}], cat_hp_cost={
|
||||
"min_child_weight": [6, 3, 2],
|
||||
})
|
||||
elif 'Dragonfly' == method:
|
||||
from ray.tune.suggest.dragonfly import DragonflySearch
|
||||
algo = DragonflySearch()
|
||||
elif 'SkOpt' == method:
|
||||
from ray.tune.suggest.skopt import SkOptSearch
|
||||
algo = SkOptSearch()
|
||||
elif 'Nevergrad' == method:
|
||||
from ray.tune.suggest.nevergrad import NevergradSearch
|
||||
import nevergrad as ng
|
||||
algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
|
||||
elif 'ZOOpt' == method:
|
||||
from ray.tune.suggest.zoopt import ZOOptSearch
|
||||
algo = ZOOptSearch(budget=num_samples*n_cpu)
|
||||
elif 'Ax' == method:
|
||||
from ray.tune.suggest.ax import AxSearch
|
||||
algo = AxSearch()
|
||||
elif 'HyperOpt' == method:
|
||||
from ray.tune.suggest.hyperopt import HyperOptSearch
|
||||
algo = HyperOptSearch()
|
||||
scheduler = None
|
||||
if method != 'BOHB':
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
scheduler = ASHAScheduler(
|
||||
max_t=max_iter,
|
||||
grace_period=1)
|
||||
analysis = tune.run(
|
||||
train_breast_cancer,
|
||||
metric="eval-logloss",
|
||||
mode="min",
|
||||
# You can add "gpu": 0.1 to allocate GPUs
|
||||
resources_per_trial={"cpu": 1},
|
||||
config=search_space, local_dir='logs/',
|
||||
num_samples=num_samples*n_cpu, time_budget_s=time_budget_s,
|
||||
scheduler=scheduler, search_alg=algo)
|
||||
ray.shutdown()
|
||||
# # Load the best model checkpoint
|
||||
# best_bst = xgb.Booster()
|
||||
# best_bst.load_model(os.path.join(analysis.best_checkpoint,
|
||||
# "model.xgb"))
|
||||
best_trial = analysis.get_best_trial("eval-logloss","min","all")
|
||||
accuracy = 1. - best_trial.metric_analysis["eval-error"]["min"]
|
||||
logloss = best_trial.metric_analysis["eval-logloss"]["min"]
|
||||
logger.info(f"method={method}")
|
||||
logger.info(f"n_samples={num_samples*n_cpu}")
|
||||
logger.info(f"time={time.time()-start_time}")
|
||||
logger.info(f"Best model eval loss: {logloss:.4f}")
|
||||
logger.info(f"Best model total accuracy: {accuracy:.4f}")
|
||||
logger.info(f"Best model parameters: {best_trial.config}")
|
||||
|
||||
|
||||
def test_xgboost_bs():
|
||||
_test_xgboost()
|
||||
|
||||
|
||||
def test_xgboost_cfo():
|
||||
_test_xgboost('CFO')
|
||||
|
||||
|
||||
def _test_xgboost_dragonfly():
|
||||
_test_xgboost('Dragonfly')
|
||||
|
||||
|
||||
def _test_xgboost_skopt():
|
||||
_test_xgboost('SkOpt')
|
||||
|
||||
|
||||
def _test_xgboost_nevergrad():
|
||||
_test_xgboost('Nevergrad')
|
||||
|
||||
|
||||
def _test_xgboost_zoopt():
|
||||
_test_xgboost('ZOOpt')
|
||||
|
||||
|
||||
def _test_xgboost_ax():
|
||||
_test_xgboost('Ax')
|
||||
|
||||
|
||||
def __test_xgboost_hyperopt():
|
||||
_test_xgboost('HyperOpt')
|
||||
|
||||
|
||||
def _test_xgboost_optuna():
|
||||
_test_xgboost('Optuna')
|
||||
|
||||
|
||||
def _test_xgboost_asha():
|
||||
_test_xgboost('ASHA')
|
||||
|
||||
|
||||
def _test_xgboost_bohb():
|
||||
_test_xgboost('BOHB')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -0,0 +1,69 @@
|
|||
import unittest
|
||||
|
||||
from sklearn.datasets import fetch_openml
|
||||
from sklearn.model_selection import train_test_split
|
||||
import numpy as np
|
||||
from flaml.automl import AutoML
|
||||
from flaml.model import XGBoostSklearnEstimator
|
||||
from flaml import tune
|
||||
|
||||
|
||||
# dataset = "blood-transfusion-service-center"
|
||||
# dataset = "Australian"
|
||||
dataset = "credit-g"
|
||||
# dataset = "phoneme"
|
||||
# dataset = "kc1"
|
||||
|
||||
|
||||
class XGBoost2D(XGBoostSklearnEstimator):
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, task):
|
||||
upper = min(32768,int(data_size))
|
||||
return {
|
||||
'n_estimators': {
|
||||
'domain': tune.qloguniform(lower=4, upper=upper, q=1),
|
||||
'init_value': 4,
|
||||
},
|
||||
'max_leaves': {
|
||||
'domain': tune.qloguniform(lower=4, upper=upper, q=1),
|
||||
'init_value': 4,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_simple(method=None):
|
||||
automl = AutoML()
|
||||
automl.add_learner(learner_name = 'XGBoost2D',
|
||||
learner_class = XGBoost2D)
|
||||
|
||||
automl_settings = {
|
||||
"estimator_list": ['XGBoost2D'],
|
||||
# "metric": 'accuracy',
|
||||
"task": 'classification',
|
||||
"log_file_name": f"test/xgboost2d_{dataset}_{method}.log",
|
||||
# "model_history": True,
|
||||
# "log_training_metric": True,
|
||||
# "split_type": split_type,
|
||||
"n_jobs": 1,
|
||||
"hpo_method": method,
|
||||
"log_type": "all",
|
||||
"time_budget": 3#6000,
|
||||
}
|
||||
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
|
||||
random_state=42)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
|
||||
def _test_optuna():
|
||||
test_simple(method="optuna")
|
||||
|
||||
|
||||
def test_grid():
|
||||
test_simple(method="grid")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Loading…
Reference in New Issue