sample_weight; dependency; notebook

2021-02-13 10:43:11 -08:00 · 2021-02-13 10:43:11 -08:00 · bd16eeee69
parent d18d292081
commit bd16eeee69
9 changed files with 676 additions and 229 deletions
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -402,7 +402,7 @@ class AutoML:
            self._X_train_all, self._y_train_all = \
                self._transformer.fit_transform(X, y, self._state.task)
            self._label_transformer = self._transformer.label_transformer
-
+        self._sample_weight_full = self._state.fit_kwargs.get('sample_weight')      
        if X_val is not None and y_val is not None:
            if not (isinstance(X_val, np.ndarray) or
                issparse(X_val) or
@ -446,7 +446,8 @@ class AutoML:
            self._X_train_all, self._y_train_all
        if issparse(X_train_all): 
            X_train_all = X_train_all.tocsr()
-        if self._state.task != 'regression':
+        if self._state.task != 'regression' and self._state.fit_kwargs.get(
+            'sample_weight') is None:
            # logger.info(f"label {pd.unique(y_train_all)}")
            label_set, counts = np.unique(y_train_all, return_counts=True)
            # augment rare classes
@ -1151,7 +1152,11 @@ class AutoML:
                stacker = Stacker(estimators, best_m,
                    n_jobs=self._state.n_jobs,
                    passthrough=True)
-                stacker.fit(self._X_train_all, self._y_train_all)
+                if self._sample_weight_full is not None:
+                    self._state.fit_kwargs[
+                        'sample_weight'] = self._sample_weight_full
+                stacker.fit(self._X_train_all, self._y_train_all,
+                 **self._state.fit_kwargs)
                logger.info(f'ensemble: {stacker}')
                self._trained_estimator = stacker
                self._trained_estimator.model = stacker
--- a/flaml/tune/README.md
+++ b/flaml/tune/README.md
@ -146,6 +146,7 @@ based on optimism in face of uncertainty.
 Example:

 ```python
+# requirements: pip install flaml[blendsearch]
 from flaml import BlendSearch
 tune.run(...
    search_alg = BlendSearch(points_to_evaluate=[init_config]),
--- a/flaml/version.py
+++ b/flaml/version.py
@ -1 +1 @@
-__version__ = "0.2.3"
+__version__ = "0.2.4"
--- a/notebook/finetune_transformer_demo.ipynb
+++ b/notebook/finetune_transformer_demo.ipynb
@ -6,11 +6,16 @@
   "source": [
    "This notebook uses the Huggingface transformers library to finetune a transformer model.\n",
    "\n",
-    "**Requirements.** This notebook has additional requirements:\n",
-    "\n",
-    "```bash\n",
-    "pip install -r transformers_requirements.txt\n",
-    "```"
+    "**Requirements.** This notebook has additional requirements:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install torch transformers datasets ipywidgets"
   ]
  },
  {
@ -699,7 +704,7 @@
   "source": [
    "### Step 3. Launch with `flaml.tune.run`\n",
    "\n",
-    "We are now ready to laungh the tuning using `flaml.tune.run`:"
+    "We are now ready to launch the tuning using `flaml.tune.run`:"
   ],
   "cell_type": "markdown",
   "metadata": {}
@ -766,9 +771,13 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "flaml",
-   "language": "python",
-   "name": "flaml"
+   "name": "python3",
+   "display_name": "Python 3.7.7 64-bit ('flaml': conda)",
+   "metadata": {
+    "interpreter": {
+     "hash": "bfcd9a6a9254a5e160761a1fd7a9e444f011592c6770d9f4180dde058a9df5dd"
+    }
+   }
  },
  "language_info": {
   "codemirror_mode": {
@ -780,7 +789,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.7.7-final"
  }
 },
 "nbformat": 4,
--- a/notebook/flaml_demo.ipynb
+++ b/notebook/flaml_demo.ipynb
--- a/notebook/transformers_requirements.txt
+++ b/notebook/transformers_requirements.txt
@ -1,4 +0,0 @@
-torch
-transformers
-datasets
-ipywidgets
--- a/setup.py
+++ b/setup.py
@ -20,7 +20,6 @@ install_requires = [
    "scipy>=1.4.1",
    "catboost>=0.23",
    "scikit-learn>=0.23.2",
-    "optuna==2.3.0"
 ],


@ -48,6 +47,10 @@ setuptools.setup(
            "coverage>=5.3",
            "xgboost<1.3",
            "rgf-python",
+            "optuna==2.3.0",
+        ],
+        "blendsearch": [
+            "optuna==2.3.0"
        ],
        "ray": [
            "ray[tune]==1.1.0",
--- a/test/test_pytorch_cifar10.py
+++ b/test/test_pytorch_cifar10.py
@ -1,3 +1,5 @@
+'''Require: pip install torchvision ray
+'''
 import unittest
 import os
 import time
--- a/test/test_tune.py
+++ b/test/test_tune.py
@ -1,5 +1,6 @@
+'''Require: pip install flaml[test,ray]
+'''
 import unittest
-import os
 import time
 from sklearn.model_selection import train_test_split
 import sklearn.metrics
@ -138,6 +139,7 @@ def _test_xgboost(method='BlendSearch'):
                    scheduler=scheduler, search_alg=algo)
            ray.shutdown()
            # # Load the best model checkpoint
+            # import os
            # best_bst = xgb.Booster()
            # best_bst.load_model(os.path.join(analysis.best_checkpoint,
            #  "model.xgb"))