mirror of https://github.com/microsoft/autogen.git
v0.1.3 Set default logging level to INFO (#14)
* set default logging level to INFO * remove unnecessary import * API future compatibility * add test for customized learner * test dependency Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
This commit is contained in:
parent
bea2ba8135
commit
cb5ce4e3a6
|
@ -37,8 +37,7 @@ jobs:
|
|||
- name: Install packages and dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install flake8 pytest coverage
|
||||
pip install -e .
|
||||
pip install -e .[test]
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
|
|
|
@ -1,12 +1,10 @@
|
|||
from flaml.automl import AutoML
|
||||
import logging
|
||||
|
||||
from flaml.model import BaseEstimator
|
||||
from flaml.data import get_output_from_log
|
||||
from flaml.version import __version__
|
||||
import logging
|
||||
|
||||
# Set the root logger.
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# Add the console handler.
|
||||
_ch = logging.StreamHandler()
|
||||
|
@ -14,4 +12,4 @@ logger_formatter = logging.Formatter(
|
|||
'[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
|
||||
'%m-%d %H:%M:%S')
|
||||
_ch.setFormatter(logger_formatter)
|
||||
logger.addHandler(_ch)
|
||||
logger.addHandler(_ch)
|
|
@ -390,22 +390,22 @@ class AutoML:
|
|||
|
||||
def add_learner(self,
|
||||
learner_name,
|
||||
learner_class,
|
||||
size_estimate=lambda config: 'unknown',
|
||||
cost_relative2lgbm=1):
|
||||
learner_class):
|
||||
'''Add a customized learner
|
||||
|
||||
Args:
|
||||
learner_name: A string of the learner's name
|
||||
learner_class: A subclass of BaseEstimator
|
||||
size_estimate: A function from a config to its memory size in float
|
||||
cost_relative2lgbm: A float number for the training cost ratio with
|
||||
respect to lightgbm(when both use the initial config)
|
||||
'''
|
||||
self._custom_learners[learner_name] = learner_class
|
||||
cost_relative2lgbm = 1
|
||||
# cost_relative2lgbm: A float number for the training cost ratio with
|
||||
# respect to lightgbm(when both use the initial config)
|
||||
self._eti_ini[learner_name] = cost_relative2lgbm
|
||||
self._config_space_info[learner_name] = \
|
||||
learner_class.params_configsearch_info
|
||||
# size_estimate: A function from a config to its memory size in float
|
||||
size_estimate = lambda config: 1.0
|
||||
self._custom_size_estimate[learner_name] = size_estimate
|
||||
|
||||
def get_estimator_from_log(self, log_file_name, record_id, objective):
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
import numpy as np
|
||||
from scipy.sparse import vstack, issparse
|
||||
import pandas as pd
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from .training_log import training_log_reader
|
||||
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
__version__ = "0.1.2"
|
||||
__version__ = "0.1.3"
|
||||
|
|
File diff suppressed because one or more lines are too long
1
setup.py
1
setup.py
|
@ -45,6 +45,7 @@ setuptools.setup(
|
|||
"flake8>=3.8.4",
|
||||
"pytest>=6.1.1",
|
||||
"coverage>=5.3",
|
||||
"rgf-python",
|
||||
],
|
||||
},
|
||||
classifiers=[
|
||||
|
|
|
@ -2,9 +2,55 @@ import unittest
|
|||
|
||||
import numpy as np
|
||||
import scipy.sparse
|
||||
from sklearn.datasets import load_boston, load_iris
|
||||
from sklearn.datasets import load_boston, load_iris, load_wine
|
||||
|
||||
from flaml import AutoML, get_output_from_log
|
||||
from flaml import AutoML
|
||||
from flaml.data import get_output_from_log
|
||||
|
||||
from flaml.model import BaseEstimator
|
||||
from flaml.space import ConfigSearchInfo
|
||||
from rgf.sklearn import RGFClassifier, RGFRegressor
|
||||
|
||||
|
||||
class MyRegularizedGreedyForest(BaseEstimator):
|
||||
|
||||
# search space
|
||||
params_configsearch_info = {
|
||||
'max_leaf': ConfigSearchInfo(name = 'max_leaf',
|
||||
type = int, lower = 4, init = 4, upper = 10000),
|
||||
'n_iter': ConfigSearchInfo(name = 'n_iter', type = int, lower = 1,
|
||||
init = 1, upper = 32768),
|
||||
'n_tree_search': ConfigSearchInfo(name = 'n_tree_search', type = int,
|
||||
lower = 1, init = 1, upper = 32768),
|
||||
'opt_interval': ConfigSearchInfo(name = 'opt_interval', type = int,
|
||||
lower = 1, init = 100, upper = 10000),
|
||||
'learning_rate': ConfigSearchInfo(name = 'learning_rate', type = float,
|
||||
lower = 0.01, init = 1.0, upper = 20.0),
|
||||
'min_samples_leaf': ConfigSearchInfo(name = 'min_samples_leaf',
|
||||
type = int, lower = 1, init = 20, upper = 20)
|
||||
}
|
||||
|
||||
def __init__(self, objective_name = 'binary:logistic', n_jobs = 1,
|
||||
max_leaf = 1000, n_iter = 1, n_tree_search = 1, opt_interval = 1,
|
||||
learning_rate = 1.0, min_samples_leaf = 1):
|
||||
|
||||
self.objective_name = objective_name
|
||||
|
||||
if 'regression' in objective_name:
|
||||
self.estimator_class = RGFRegressor
|
||||
else:
|
||||
self.estimator_class = RGFClassifier
|
||||
|
||||
# round integer hyperparameters
|
||||
self.params = {
|
||||
'max_leaf': int(round(max_leaf)),
|
||||
'n_iter': int(round(n_iter)),
|
||||
'n_tree_search': int(round(n_tree_search)),
|
||||
'opt_interval': int(round(opt_interval)),
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf':int(round(min_samples_leaf)),
|
||||
"n_jobs": n_jobs,
|
||||
}
|
||||
|
||||
|
||||
def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):
|
||||
|
@ -19,6 +65,23 @@ def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):
|
|||
|
||||
class TestAutoML(unittest.TestCase):
|
||||
|
||||
def test_custom_learner(self):
|
||||
automl = AutoML()
|
||||
automl.add_learner(learner_name = 'RGF',
|
||||
learner_class = MyRegularizedGreedyForest)
|
||||
X_train, y_train = load_wine(return_X_y=True)
|
||||
settings = {
|
||||
"time_budget": 10, # total running time in seconds
|
||||
"estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
|
||||
"task": 'classification', # task type
|
||||
"sample": True, # whether to subsample training data
|
||||
"log_file_name": "test/wine.log",
|
||||
"log_training_metric": True, # whether to log training metric
|
||||
}
|
||||
|
||||
'''The main flaml automl API'''
|
||||
automl.fit(X_train = X_train, y_train = y_train, **settings)
|
||||
|
||||
def test_dataframe(self):
|
||||
self.test_classification(True)
|
||||
|
||||
|
|
|
@ -36,9 +36,9 @@ class TestLogging(unittest.TestCase):
|
|||
"model_history": True
|
||||
}
|
||||
X_train, y_train = load_boston(return_X_y=True)
|
||||
n = len(y_train)
|
||||
automl_experiment.fit(X_train=X_train[:n >> 1], y_train=y_train[:n >> 1],
|
||||
X_val=X_train[n >> 1:], y_val=y_train[n >> 1:],
|
||||
n = len(y_train) >> 1
|
||||
automl_experiment.fit(X_train=X_train[:n], y_train=y_train[:n],
|
||||
X_val=X_train[n:], y_val=y_train[n:],
|
||||
**automl_settings)
|
||||
|
||||
# Check if the log buffer is populated.
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
import os
|
||||
import unittest
|
||||
import logging
|
||||
import json
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
from sklearn.datasets import load_boston
|
||||
|
|
Loading…
Reference in New Issue