v0.1.3 Set default logging level to INFO (#14)

* set default logging level to INFO * remove unnecessary import * API future compatibility * add test for customized learner * test dependency Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
2020-12-15 08:10:43 -08:00 · 2020-12-15 08:10:43 -08:00 · cb5ce4e3a6
parent bea2ba8135
commit cb5ce4e3a6
10 changed files with 219 additions and 136 deletions
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@ -37,8 +37,7 @@ jobs:
      - name: Install packages and dependencies
        run: |
          python -m pip install --upgrade pip
-          pip install flake8 pytest coverage
-          pip install -e .
+          pip install -e .[test]
      - name: Lint with flake8
        run: |
          # stop the build if there are Python syntax errors or undefined names
--- a/flaml/init.py
+++ b/flaml/init.py
@ -1,12 +1,10 @@
 from flaml.automl import AutoML
-import logging
-
-from flaml.model import BaseEstimator
-from flaml.data import get_output_from_log
 from flaml.version import __version__
+import logging

 # Set the root logger.
 logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)

 # Add the console handler.
 _ch = logging.StreamHandler()
@ -14,4 +12,4 @@ logger_formatter = logging.Formatter(
    '[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
    '%m-%d %H:%M:%S')
 _ch.setFormatter(logger_formatter)
-logger.addHandler(_ch)
+logger.addHandler(_ch)
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -390,22 +390,22 @@ class AutoML:

    def add_learner(self,
                    learner_name,
-                    learner_class,
-                    size_estimate=lambda config: 'unknown',
-                    cost_relative2lgbm=1):
+                    learner_class):
        '''Add a customized learner

        Args:
            learner_name: A string of the learner's name
            learner_class: A subclass of BaseEstimator
-            size_estimate: A function from a config to its memory size in float
-            cost_relative2lgbm: A float number for the training cost ratio with
-                respect to lightgbm(when both use the initial config)
        '''
        self._custom_learners[learner_name] = learner_class
+        cost_relative2lgbm = 1
+        # cost_relative2lgbm: A float number for the training cost ratio with
+        #     respect to lightgbm(when both use the initial config)
        self._eti_ini[learner_name] = cost_relative2lgbm
        self._config_space_info[learner_name] = \
            learner_class.params_configsearch_info
+        # size_estimate: A function from a config to its memory size in float
+        size_estimate = lambda config: 1.0
        self._custom_size_estimate[learner_name] = size_estimate

    def get_estimator_from_log(self, log_file_name, record_id, objective):
--- a/flaml/data.py
+++ b/flaml/data.py
@ -6,7 +6,6 @@
 import numpy as np
 from scipy.sparse import vstack, issparse
 import pandas as pd
-from sklearn.preprocessing import LabelEncoder
 from .training_log import training_log_reader


--- a/flaml/version.py
+++ b/flaml/version.py
@ -1 +1 @@
-__version__ = "0.1.2"
+__version__ = "0.1.3"
--- a/notebook/flaml_demo.ipynb
+++ b/notebook/flaml_demo.ipynb
--- a/setup.py
+++ b/setup.py
@ -45,6 +45,7 @@ setuptools.setup(
            "flake8>=3.8.4",
            "pytest>=6.1.1",
            "coverage>=5.3",
+            "rgf-python",
        ],
    },
    classifiers=[
--- a/test/test_automl.py
+++ b/test/test_automl.py
@ -2,9 +2,55 @@ import unittest

 import numpy as np
 import scipy.sparse
-from sklearn.datasets import load_boston, load_iris
+from sklearn.datasets import load_boston, load_iris, load_wine

-from flaml import AutoML, get_output_from_log
+from flaml import AutoML
+from flaml.data import get_output_from_log
+
+from flaml.model import BaseEstimator
+from flaml.space import ConfigSearchInfo
+from rgf.sklearn import RGFClassifier, RGFRegressor
+
+
+class MyRegularizedGreedyForest(BaseEstimator):
+
+    # search space
+    params_configsearch_info = {
+        'max_leaf': ConfigSearchInfo(name = 'max_leaf',
+         type = int, lower = 4, init = 4, upper = 10000),
+        'n_iter': ConfigSearchInfo(name = 'n_iter', type = int, lower = 1,
+         init = 1, upper = 32768),
+        'n_tree_search': ConfigSearchInfo(name = 'n_tree_search', type = int,
+         lower = 1, init = 1, upper = 32768),
+        'opt_interval': ConfigSearchInfo(name = 'opt_interval', type = int,
+         lower = 1, init = 100, upper = 10000),
+        'learning_rate': ConfigSearchInfo(name = 'learning_rate', type = float,
+         lower = 0.01, init = 1.0, upper = 20.0),
+        'min_samples_leaf': ConfigSearchInfo(name = 'min_samples_leaf',
+         type = int, lower = 1, init = 20, upper = 20)
+    }
+    
+    def __init__(self, objective_name = 'binary:logistic', n_jobs = 1,
+     max_leaf = 1000, n_iter = 1, n_tree_search = 1, opt_interval = 1,
+      learning_rate = 1.0, min_samples_leaf = 1):
+
+        self.objective_name = objective_name
+
+        if 'regression' in objective_name:
+            self.estimator_class = RGFRegressor
+        else:
+            self.estimator_class = RGFClassifier
+
+        # round integer hyperparameters
+        self.params = {
+            'max_leaf': int(round(max_leaf)),
+            'n_iter': int(round(n_iter)),
+            'n_tree_search': int(round(n_tree_search)),
+            'opt_interval': int(round(opt_interval)),
+            'learning_rate': learning_rate,
+            'min_samples_leaf':int(round(min_samples_leaf)),
+            "n_jobs": n_jobs,
+        }            


 def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):
@ -19,6 +65,23 @@ def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):

 class TestAutoML(unittest.TestCase):

+    def test_custom_learner(self):
+        automl = AutoML()
+        automl.add_learner(learner_name = 'RGF',
+            learner_class = MyRegularizedGreedyForest)            
+        X_train, y_train = load_wine(return_X_y=True)
+        settings = {
+            "time_budget": 10, # total running time in seconds
+            "estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'], 
+            "task": 'classification', # task type    
+            "sample": True, # whether to subsample training data
+            "log_file_name": "test/wine.log",
+            "log_training_metric": True, # whether to log training metric
+        }
+
+        '''The main flaml automl API'''
+        automl.fit(X_train = X_train, y_train = y_train, **settings)
+
    def test_dataframe(self):
        self.test_classification(True)

--- a/test/test_python_log.py
+++ b/test/test_python_log.py
@ -36,9 +36,9 @@ class TestLogging(unittest.TestCase):
                "model_history": True
            }
            X_train, y_train = load_boston(return_X_y=True)
-            n = len(y_train)
-            automl_experiment.fit(X_train=X_train[:n >> 1], y_train=y_train[:n >> 1],
-                                  X_val=X_train[n >> 1:], y_val=y_train[n >> 1:],
+            n = len(y_train) >> 1
+            automl_experiment.fit(X_train=X_train[:n], y_train=y_train[:n],
+                                  X_val=X_train[n:], y_val=y_train[n:],
                                  **automl_settings)

            # Check if the log buffer is populated.
--- a/test/test_training_log.py
+++ b/test/test_training_log.py
@ -1,7 +1,5 @@
 import os
 import unittest
-import logging
-import json
 from tempfile import TemporaryDirectory

 from sklearn.datasets import load_boston