* unordered categorical

* allow cost attribute to be None

* tensorboardX version

* quote

* cfo cat

* trunc

* Update version.py

* incumbent is normalized

* python 3.9

* remove ConcurrencyLimiter

* seed

* estimator

* update autovw notebook

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qiw@microsoft.com>
This commit is contained in:
Qingyun Wu 2021-07-05 21:17:26 -04:00 committed by GitHub
parent e41b42842a
commit a291abfab9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 177 additions and 74 deletions

View File

@ -16,7 +16,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
python-version: [3.6, 3.7, 3.8]
python-version: [3.6, 3.7, 3.8, 3.9]
steps:
- uses: actions/checkout@v2
@ -39,9 +39,10 @@ jobs:
python -m pip install --upgrade pip
pip install -e .[test]
- name: If linux or mac, install ray
if: matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest'
if: (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') && matrix.python-version != '3.9'
run: |
pip install -e .[ray]
pip install 'tensorboardX<=2.2'
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names

View File

@ -1,6 +1,6 @@
[![PyPI version](https://badge.fury.io/py/FLAML.svg)](https://badge.fury.io/py/FLAML)
[![Build](https://github.com/microsoft/FLAML/actions/workflows/python-package.yml/badge.svg)](https://github.com/microsoft/FLAML/actions/workflows/python-package.yml)
![Python Version](https://img.shields.io/badge/3.6%20%7C%203.7%20%7C%203.8-blue)
![Python Version](https://img.shields.io/badge/3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9-blue)
[![Downloads](https://pepy.tech/badge/flaml/month)](https://pepy.tech/project/flaml)
[![Join the chat at https://gitter.im/FLAMLer/community](https://badges.gitter.im/FLAMLer/community.svg)](https://gitter.im/FLAMLer/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

View File

@ -981,15 +981,9 @@ class AutoML:
self._retrained_config = {}
est_retrain_time = next_trial_time = 0
best_config_sig = None
# use ConcurrencyLimiter to limit the amount of concurrency when
# using a search algorithm
better = True # whether we find a better model in one trial
if self._ensemble:
self.best_model = {}
try:
from ray.tune.suggest import ConcurrencyLimiter
except ImportError:
from .searcher.suggestion import ConcurrencyLimiter
if self._hpo_method in ('cfo', 'grid'):
from flaml import CFO as SearchAlgo
elif 'optuna' == self._hpo_method:
@ -1062,12 +1056,11 @@ class AutoML:
metric='val_loss', mode='min', space=search_space,
points_to_evaluate=points_to_evaluate,
)
search_state.search_alg = ConcurrencyLimiter(algo,
max_concurrent=1)
search_state.search_alg = algo
else:
search_space = None
if self._hpo_method in ('bs', 'cfo'):
search_state.search_alg.searcher.set_search_properties(
search_state.search_alg.set_search_properties(
config={
'metric_target': self._state.best_loss,
},

View File

@ -67,6 +67,12 @@ class BaseEstimator:
'''
return self._model
@property
def estimator(self):
'''Trained model after fit() is called, or None before fit() is called
'''
return self._model
def _preprocess(self, X):
return X

View File

@ -17,7 +17,7 @@ except ImportError:
from .suggestion import OptunaSearch as GlobalSearch
from .variant_generator import generate_variants
from .search_thread import SearchThread
from .flow2 import FLOW2 as LocalSearch
from .flow2 import FLOW2
import logging
logger = logging.getLogger(__name__)
@ -30,6 +30,7 @@ class BlendSearch(Searcher):
cost_attr = "time_total_s" # cost attribute in result
lagrange = '_lagrange' # suffix for lagrange-modified metric
penalty = 1e+10 # penalty term for constraints
LocalSearch = FLOW2
def __init__(self,
metric: Optional[str] = None,
@ -131,7 +132,7 @@ class BlendSearch(Searcher):
self._gs = GlobalSearch(space=space, metric=metric, mode=mode)
else:
self._gs = None
self._ls = LocalSearch(
self._ls = self.LocalSearch(
init_config, metric, mode, cat_hp_cost, space,
prune_attr, min_resource, max_resource, reduction_factor, seed)
self._init_search()
@ -277,7 +278,9 @@ class BlendSearch(Searcher):
self._search_thread_pool[self._thread_count] = SearchThread(
self._ls.mode,
self._ls.create(
config, objective, cost=result[self.cost_attr])
config, objective,
cost=result.get(self.cost_attr, 1)),
self.cost_attr
)
thread_id = self._thread_count
self._thread_count += 1

31
flaml/searcher/cfo_cat.py Normal file
View File

@ -0,0 +1,31 @@
'''!
* Copyright (c) 2021 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the
* project root for license information.
'''
from .flow2 import FLOW2
from .blendsearch import CFO
class FLOW2Cat(FLOW2):
'''Local search algorithm optimized for categorical variables
'''
def _init_search(self):
super()._init_search()
self.step_ub = 1
self.step = self.STEPSIZE * self.step_ub
lb = self.step_lower_bound
if lb > self.step:
self.step = lb * 2
# upper bound
if self.step > self.step_ub:
self.step = self.step_ub
self._trunc = self.dim
class CFOCat(CFO):
'''CFO optimized for categorical variables
'''
LocalSearch = FLOW2Cat

View File

@ -172,12 +172,12 @@ class FLOW2(Searcher):
self._num_complete4incumbent = self._cost_complete4incumbent = 0
self._num_allowed4incumbent = 2 * self.dim
self._proposed_by = {} # trial_id: int -> incumbent: Dict
self.step = self.STEPSIZE * np.sqrt(self.dim)
self.step_ub = np.sqrt(self.dim)
self.step = self.STEPSIZE * self.step_ub
lb = self.step_lower_bound
if lb > self.step:
self.step = lb * 2
# upper bound
self.step_ub = np.sqrt(self.dim)
if self.step > self.step_ub:
self.step = self.step_ub
# maximal # consecutive no improvements
@ -189,8 +189,11 @@ class FLOW2(Searcher):
self._reset_times = 0
# record intermediate trial cost
self._trial_cost = {}
self._same = False # whether the proposedd config is the same as best_config
self._init_phrase = True # initial phase to increase initial stepsize
self._same = False # whether the proposed config is the same as best_config
self._init_phase = True # initial phase to increase initial stepsize
self._trunc = 0
# no truncation by default. when > 0, it means how many
# non-zero dimensions to keep in the random unit vector
@property
def step_lower_bound(self) -> float:
@ -215,7 +218,7 @@ class FLOW2(Searcher):
if np.isinf(step_lb):
step_lb = self.STEP_LOWER_BOUND
else:
step_lb *= np.sqrt(self.dim)
step_lb *= self.step_ub
return step_lb
@property
@ -285,12 +288,14 @@ class FLOW2(Searcher):
return unflatten_dict(config)
def create(self, init_config: Dict, obj: float, cost: float) -> Searcher:
flow2 = FLOW2(init_config, self.metric, self.mode, self._cat_hp_cost,
unflatten_dict(self.space), self.prune_attr,
self.min_resource, self.max_resource,
self.resource_multiple_factor, self._seed + 1)
flow2 = self.__class__(
init_config, self.metric, self.mode, self._cat_hp_cost,
unflatten_dict(self.space), self.prune_attr,
self.min_resource, self.max_resource,
self.resource_multiple_factor, self._seed + 1)
flow2.best_obj = obj * self.metric_op # minimize internally
flow2.cost_incumbent = cost
self._seed += 1
return flow2
def normalize(self, config) -> Dict:
@ -315,10 +320,11 @@ class FLOW2(Searcher):
elif key in self.incumbent:
config_norm[key] = self.incumbent[
key] if value == self.best_config[
key] else (self.incumbent[
key] + 1) % self._unordered_cat_hp[key]
key] else (
self.incumbent[key]
+ 1.0 / self._unordered_cat_hp[key]) % 1
else:
config_norm[key] = 0
config_norm[key] = 0.5
continue
# Uniform/LogUniform/Normal/Base
sampler = domain.get_sampler()
@ -365,7 +371,8 @@ class FLOW2(Searcher):
config_denorm[key] = l[min(n - 1, int(np.floor(value * n)))]
else:
assert key in self.incumbent
if round(value) == self.incumbent[key]:
n = self._unordered_cat_hp[key]
if np.floor(value * n) == np.floor(self.incumbent[key] * n):
config_denorm[key] = self.best_config[key]
else: # ****random value each time!****
config_denorm[key] = self._random.choice(
@ -448,7 +455,11 @@ class FLOW2(Searcher):
if self.step > self.step_ub:
self.step = self.step_ub
self._iter_best_config = self.trial_count_complete
if self._trunc:
self._trunc = min(self._trunc + 1, self.dim)
return
elif self._trunc:
self._trunc = max(self._trunc >> 1, 1)
proposed_by = self._proposed_by.get(trial_id)
if proposed_by == self.incumbent:
# proposed by current incumbent and no better
@ -494,8 +505,10 @@ class FLOW2(Searcher):
# record the cost in case it is pruned and cost info is lost
self._trial_cost[trial_id] = cost
def rand_vector_unit_sphere(self, dim) -> np.ndarray:
def rand_vector_unit_sphere(self, dim, trunc=0) -> np.ndarray:
vec = self._random.normal(0, 1, dim)
if 0 < trunc < dim:
vec[np.abs(vec).argsort()[:dim - trunc]] = 0
mag = np.linalg.norm(vec)
return vec / mag
@ -532,7 +545,7 @@ class FLOW2(Searcher):
else:
# propose a new direction
self._direction_tried = self.rand_vector_unit_sphere(
self.dim) * self.step
self.dim, self._trunc) * self.step
for i, key in enumerate(self._tunable_keys):
move[key] += self._direction_tried[i]
self._project(move)
@ -540,13 +553,14 @@ class FLOW2(Searcher):
self._proposed_by[trial_id] = self.incumbent
self._configs[trial_id] = (config, self.step)
self._num_proposedby_incumbent += 1
if self._init_phrase:
best_config = flatten_dict(self.best_config)
if self._init_phase:
if self._direction_tried is None:
if self._same:
# check if the new config is different from self.best_config
# check if the new config is different from best_config
same = True
for key, value in config.items():
if key not in self.best_config or value != self.best_config[key]:
if key not in best_config or value != best_config[key]:
same = False
break
if same:
@ -555,10 +569,10 @@ class FLOW2(Searcher):
if self.step > self.step_ub:
self.step = self.step_ub
else:
# check if the new config is different from self.best_config
# check if the new config is different from best_config
same = True
for key, value in config.items():
if key not in self.best_config or value != self.best_config[key]:
if key not in best_config or value != best_config[key]:
same = False
break
self._same = same
@ -566,7 +580,7 @@ class FLOW2(Searcher):
not self._resource or self._resource == self.max_resource):
# check stuck condition if using max resource
self._num_proposedby_incumbent -= 2
self._init_phrase = False
self._init_phase = False
if self.step >= self.step_lower_bound:
# decrease step size
self._oldK = self._K if self._K else self._iter_best_config
@ -574,6 +588,27 @@ class FLOW2(Searcher):
self.step *= np.sqrt(self._oldK / self._K)
else:
return None
if self._init_phase:
return unflatten_dict(config)
if self._trunc == 1 and self._direction_tried is not None:
# random
for i, key in enumerate(self._tunable_keys):
if self._direction_tried[i] != 0:
for _, generated in generate_variants({'config': {
key: self.space[key]
}}):
if generated['config'][key] != best_config[key]:
config[key] = generated['config'][key]
return unflatten_dict(config)
break
else:
# check if config == best_config
if len(config) == len(best_config):
for key, value in best_config.items():
if value != config[key]:
return unflatten_dict(config)
# print('move to', move)
self.incumbent = move
return unflatten_dict(config)
def _project(self, config):

View File

@ -19,11 +19,11 @@ class SearchThread:
'''Class of global or local search thread
'''
cost_attr = 'time_total_s'
_eps = 1.0
def __init__(self, mode: str = "min",
search_alg: Optional[Searcher] = None):
search_alg: Optional[Searcher] = None,
cost_attr: Optional[str] = 'time_total_s'):
''' When search_alg is omitted, use local search FLOW2
'''
self._search_alg = search_alg
@ -40,6 +40,7 @@ class SearchThread:
self.priority = self.speed = 0
self._init_config = True
self.running = 0 # the number of running trials from the thread
self.cost_attr = cost_attr
@classmethod
def set_eps(cls, time_budget_s):
@ -108,9 +109,8 @@ class SearchThread:
# under this thread
self._init_config = False
if result:
if self.cost_attr in result:
self.cost_last = result[self.cost_attr]
self.cost_total += self.cost_last
self.cost_last = result.get(self.cost_attr, 1)
self.cost_total += self.cost_last
if self._search_alg.metric in result:
obj = result[self._search_alg.metric] * self._metric_op
if obj < self.obj_best1:

File diff suppressed because one or more lines are too long

View File

@ -56,7 +56,6 @@ setuptools.setup(
"torch==1.8.1",
"datasets==1.4.1",
"azure-storage-blob",
"tensorflow"
],
"blendsearch": [
"optuna==2.3.0"
@ -78,6 +77,7 @@ setuptools.setup(
"ray[tune]>=1.2.0",
"transformers",
"datasets==1.4.1",
"tensorboardX<=2.2",
"torch"
]
},