v0.1.3 Set default logging level to INFO (#14)

* set default logging level to INFO

* remove unnecessary import

* API future compatibility

* add test for customized learner

* test dependency

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
This commit is contained in:
Chi Wang 2020-12-15 08:10:43 -08:00 committed by GitHub
parent bea2ba8135
commit cb5ce4e3a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 219 additions and 136 deletions

View File

@ -37,8 +37,7 @@ jobs:
- name: Install packages and dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest coverage
pip install -e .
pip install -e .[test]
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names

View File

@ -1,12 +1,10 @@
from flaml.automl import AutoML
import logging
from flaml.model import BaseEstimator
from flaml.data import get_output_from_log
from flaml.version import __version__
import logging
# Set the root logger.
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
# Add the console handler.
_ch = logging.StreamHandler()
@ -14,4 +12,4 @@ logger_formatter = logging.Formatter(
'[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
'%m-%d %H:%M:%S')
_ch.setFormatter(logger_formatter)
logger.addHandler(_ch)
logger.addHandler(_ch)

View File

@ -390,22 +390,22 @@ class AutoML:
def add_learner(self,
learner_name,
learner_class,
size_estimate=lambda config: 'unknown',
cost_relative2lgbm=1):
learner_class):
'''Add a customized learner
Args:
learner_name: A string of the learner's name
learner_class: A subclass of BaseEstimator
size_estimate: A function from a config to its memory size in float
cost_relative2lgbm: A float number for the training cost ratio with
respect to lightgbm(when both use the initial config)
'''
self._custom_learners[learner_name] = learner_class
cost_relative2lgbm = 1
# cost_relative2lgbm: A float number for the training cost ratio with
# respect to lightgbm(when both use the initial config)
self._eti_ini[learner_name] = cost_relative2lgbm
self._config_space_info[learner_name] = \
learner_class.params_configsearch_info
# size_estimate: A function from a config to its memory size in float
size_estimate = lambda config: 1.0
self._custom_size_estimate[learner_name] = size_estimate
def get_estimator_from_log(self, log_file_name, record_id, objective):

View File

@ -6,7 +6,6 @@
import numpy as np
from scipy.sparse import vstack, issparse
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from .training_log import training_log_reader

View File

@ -1 +1 @@
__version__ = "0.1.2"
__version__ = "0.1.3"

File diff suppressed because one or more lines are too long

View File

@ -45,6 +45,7 @@ setuptools.setup(
"flake8>=3.8.4",
"pytest>=6.1.1",
"coverage>=5.3",
"rgf-python",
],
},
classifiers=[

View File

@ -2,9 +2,55 @@ import unittest
import numpy as np
import scipy.sparse
from sklearn.datasets import load_boston, load_iris
from sklearn.datasets import load_boston, load_iris, load_wine
from flaml import AutoML, get_output_from_log
from flaml import AutoML
from flaml.data import get_output_from_log
from flaml.model import BaseEstimator
from flaml.space import ConfigSearchInfo
from rgf.sklearn import RGFClassifier, RGFRegressor
class MyRegularizedGreedyForest(BaseEstimator):
# search space
params_configsearch_info = {
'max_leaf': ConfigSearchInfo(name = 'max_leaf',
type = int, lower = 4, init = 4, upper = 10000),
'n_iter': ConfigSearchInfo(name = 'n_iter', type = int, lower = 1,
init = 1, upper = 32768),
'n_tree_search': ConfigSearchInfo(name = 'n_tree_search', type = int,
lower = 1, init = 1, upper = 32768),
'opt_interval': ConfigSearchInfo(name = 'opt_interval', type = int,
lower = 1, init = 100, upper = 10000),
'learning_rate': ConfigSearchInfo(name = 'learning_rate', type = float,
lower = 0.01, init = 1.0, upper = 20.0),
'min_samples_leaf': ConfigSearchInfo(name = 'min_samples_leaf',
type = int, lower = 1, init = 20, upper = 20)
}
def __init__(self, objective_name = 'binary:logistic', n_jobs = 1,
max_leaf = 1000, n_iter = 1, n_tree_search = 1, opt_interval = 1,
learning_rate = 1.0, min_samples_leaf = 1):
self.objective_name = objective_name
if 'regression' in objective_name:
self.estimator_class = RGFRegressor
else:
self.estimator_class = RGFClassifier
# round integer hyperparameters
self.params = {
'max_leaf': int(round(max_leaf)),
'n_iter': int(round(n_iter)),
'n_tree_search': int(round(n_tree_search)),
'opt_interval': int(round(opt_interval)),
'learning_rate': learning_rate,
'min_samples_leaf':int(round(min_samples_leaf)),
"n_jobs": n_jobs,
}
def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):
@ -19,6 +65,23 @@ def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):
class TestAutoML(unittest.TestCase):
def test_custom_learner(self):
automl = AutoML()
automl.add_learner(learner_name = 'RGF',
learner_class = MyRegularizedGreedyForest)
X_train, y_train = load_wine(return_X_y=True)
settings = {
"time_budget": 10, # total running time in seconds
"estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
"task": 'classification', # task type
"sample": True, # whether to subsample training data
"log_file_name": "test/wine.log",
"log_training_metric": True, # whether to log training metric
}
'''The main flaml automl API'''
automl.fit(X_train = X_train, y_train = y_train, **settings)
def test_dataframe(self):
self.test_classification(True)

View File

@ -36,9 +36,9 @@ class TestLogging(unittest.TestCase):
"model_history": True
}
X_train, y_train = load_boston(return_X_y=True)
n = len(y_train)
automl_experiment.fit(X_train=X_train[:n >> 1], y_train=y_train[:n >> 1],
X_val=X_train[n >> 1:], y_val=y_train[n >> 1:],
n = len(y_train) >> 1
automl_experiment.fit(X_train=X_train[:n], y_train=y_train[:n],
X_val=X_train[n:], y_val=y_train[n:],
**automl_settings)
# Check if the log buffer is populated.

View File

@ -1,7 +1,5 @@
import os
import unittest
import logging
import json
from tempfile import TemporaryDirectory
from sklearn.datasets import load_boston