install editable package in codespace (#826)

* install editable package in codespace

* fix test error in test_forecast

* fix test error in test_space

* openml version

* break tests; pre-commit

* skip on py10+win32

* install mlflow in test

* install mlflow in [test]

* skip test in windows

* import

* handle PermissionError

* skip test in windows

* skip test in windows

* skip test in windows

* skip test in windows

* remove ts_forecast_panel from doc
This commit is contained in:
Chi Wang 2022-11-27 11:22:54 -08:00 committed by GitHub
parent 586afe0d6b
commit 595af7a04f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 129 additions and 59 deletions

View File

@ -17,10 +17,7 @@ RUN apt-get update \
&& rm -rf /var/lib/apt/lists/*
ENV DEBIAN_FRONTEND=dialog
#
# Install extras for development
#
RUN pip3 --disable-pip-version-check --no-cache-dir install flaml[test,notebook]
RUN pip3 --disable-pip-version-check --no-cache-dir install flaml
# For docs
RUN npm install --global yarn
RUN pip install pydoc-markdown==4.5.0

View File

@ -8,5 +8,6 @@
}
},
"terminal.integrated.defaultProfile.linux": "bash"
}
},
"updateContentCommand": "pip install -e .[test,notebook] && pre-commit install"
}

View File

@ -2213,7 +2213,7 @@ class AutoML(BaseEstimator):
```
task: A string of the task type, e.g.,
'classification', 'regression', 'ts_forecast_regression',
'ts_forecast_classification', 'ts_forecast_panel', 'rank', 'seq-classification',
'ts_forecast_classification', 'rank', 'seq-classification',
'seq-regression', 'summarization'.
n_jobs: An integer of the number of threads for training | default=-1.
Use all available resources when n_jobs == -1.

View File

@ -2266,18 +2266,13 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
return training, train_dataloader, val_dataloader
def fit(self, X_train, y_train, budget=None, **kwargs):
import copy
from pathlib import Path
import warnings
import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch
from pytorch_forecasting import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
import tensorboard as tb
warnings.filterwarnings("ignore")
current_time = time.time()

View File

@ -18,7 +18,6 @@
from typing import Dict, Optional
import numpy as np
from .trial import Trial
from collections import defaultdict
import logging
logger = logging.getLogger(__name__)

View File

@ -428,7 +428,12 @@ class Categorical(Domain):
):
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
random_state = _BackwardsCompatibleNumpyRng(random_state)
items = random_state.choice(domain.categories, size=size).tolist()
# do not use .choice() directly on domain.categories
# as that will coerce them to a single dtype
indices = random_state.choice(
np.arange(0, len(domain.categories)), size=size
)
items = [domain.categories[index] for index in indices]
return items if len(items) > 1 else domain.cast(items[0])
default_sampler_cls = _Uniform
@ -479,8 +484,18 @@ class Quantized(Sampler):
):
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
random_state = _BackwardsCompatibleNumpyRng(random_state)
values = self.sampler.sample(domain, spec, size, random_state=random_state)
if self.q == 1:
return self.sampler.sample(domain, spec, size, random_state=random_state)
quantized_domain = copy(domain)
quantized_domain.lower = np.ceil(domain.lower / self.q) * self.q
quantized_domain.upper = np.floor(domain.upper / self.q) * self.q
values = self.sampler.sample(
quantized_domain, spec, size, random_state=random_state
)
quantized = np.round(np.divide(values, self.q)) * self.q
if not isinstance(quantized, np.ndarray):
return domain.cast(quantized)
return list(quantized)
@ -586,7 +601,9 @@ def lograndint(lower: int, upper: int, base: float = 10):
def qrandint(lower: int, upper: int, q: int = 1):
"""Sample an integer value uniformly between ``lower`` and ``upper``.
``lower`` is inclusive, ``upper`` is also inclusive (!).
The value will be quantized, i.e. rounded to an integer increment of ``q``.
Quantization makes the upper bound inclusive.
"""
@ -614,12 +631,15 @@ def randn(mean: float = 0.0, sd: float = 1.0):
def qrandn(mean: float, sd: float, q: float):
"""Sample a float value normally with ``mean`` and ``sd``.
The value will be quantized, i.e. rounded to an integer increment of ``q``.
Args:
mean (float): Mean of the normal distribution.
sd (float): SD of the normal distribution.
q (float): Quantization number. The result will be rounded to an
mean: Mean of the normal distribution.
sd: SD of the normal distribution.
q: Quantization number. The result will be rounded to an
integer increment of this value.
"""
return Float(None, None).normal(mean, sd).quantized(q)

View File

@ -38,10 +38,10 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install flaml[notebook]\n",
"# from v0.6.6, catboost is made an optional dependency to build conda package.\n",
"# to install catboost without installing the notebook option, you can run:\n",
"# %pip install flaml[catboost]"
"%pip install flaml[notebook] openml==0.10.2\n",
"# From v0.6.6, catboost is made an optional dependency to build conda package.\n",
"# To install catboost, you can run:\n",
"%pip install flaml[catboost]"
]
},
{
@ -836,6 +836,15 @@
"In this example, the above information for RGF is wrapped in a python class called *MyRegularizedGreedyForest* that exposes the hyperparameters."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install rgf-python"
]
},
{
"cell_type": "code",
"execution_count": 19,
@ -1259,11 +1268,8 @@
}
],
"metadata": {
"interpreter": {
"hash": "5432eb6463ddd46aaa76ccf859b1fa421ab98224a755661a6688060ed6e23d59"
},
"kernelspec": {
"display_name": "ds440flaml",
"display_name": "Python 3.9.15 64-bit",
"language": "python",
"name": "python3"
},
@ -1277,7 +1283,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.9.15"
},
"vscode": {
"interpreter": {
"hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
}
}
},
"nbformat": 4,

View File

@ -40,11 +40,8 @@ setuptools.setup(
install_requires=install_requires,
extras_require={
"notebook": [
"openml==0.10.2",
"jupyter",
"matplotlib",
"rgf-python",
"catboost>=0.26",
],
"test": [
"flake8>=3.8.4",
@ -57,7 +54,7 @@ setuptools.setup(
"catboost>=0.26",
"rgf-python",
"optuna==2.8.0",
"openml",
"openml==0.10.2",
"statsmodels>=0.12.2",
"psutil==5.8.0",
"dataclasses",
@ -67,7 +64,8 @@ setuptools.setup(
"rouge_score",
"hcrystalball==0.1.10",
"seqeval",
"pytorch-forecasting>=0.9.0",
"pytorch-forecasting>=0.9.0,<=0.10.1",
"mlflow",
],
"catboost": ["catboost>=0.26"],
"blendsearch": ["optuna==2.8.0"],

View File

@ -108,10 +108,7 @@ def _test_nobudget():
def test_mlflow():
import subprocess
import sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"])
# subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"])
import mlflow
from flaml.data import load_openml_task
@ -152,9 +149,12 @@ def test_mlflow():
print(automl.predict_proba(X_test))
except ImportError:
pass
# subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
def test_mlflow_iris():
from sklearn.datasets import load_iris
import mlflow
from flaml import AutoML
with mlflow.start_run():
automl = AutoML()
@ -167,6 +167,8 @@ def test_mlflow():
X_train, y_train = load_iris(return_X_y=True)
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
# subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
if __name__ == "__main__":
test_automl(600)

View File

@ -74,7 +74,10 @@ def test_hf_data():
del automl
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
if __name__ == "__main__":

View File

@ -113,7 +113,10 @@ def _test_switch_classificationhead(each_data, each_model_path):
return
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
if __name__ == "__main__":

View File

@ -84,7 +84,10 @@ def test_custom_metric():
del automl
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
if __name__ == "__main__":

View File

@ -5,7 +5,9 @@ import os
import shutil
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@pytest.mark.skipif(
sys.platform in ["darwin", "win32"], reason="do not run on mac os or windows"
)
def test_cv():
from flaml import AutoML
import requests
@ -22,7 +24,10 @@ def test_cv():
return
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
if __name__ == "__main__":

View File

@ -5,7 +5,9 @@ import os
import shutil
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@pytest.mark.skipif(
sys.platform in ["darwin", "win32"], reason="do not run on mac os or windows"
)
def test_mcc():
from flaml import AutoML
import requests
@ -49,7 +51,10 @@ def test_mcc():
print("Accuracy: " + str(accuracy))
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
if __name__ == "__main__":

View File

@ -35,7 +35,10 @@ def test_regression():
automl.predict(X_val)
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
if __name__ == "__main__":

View File

@ -7,8 +7,8 @@ import shutil
@pytest.mark.skipif(
sys.platform == "darwin" or sys.version < "3.7",
reason="do not run on mac os or py3.6",
sys.platform in ["darwin", "win32"] or sys.version < "3.7",
reason="do not run on mac os, windows or py3.6",
)
def test_summarization():
# TODO: manual test for how effective postprocess_seq2seq_prediction_label is
@ -51,7 +51,10 @@ def test_summarization():
automl.predict(X_test)
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
if __name__ == "__main__":

View File

@ -11,8 +11,8 @@ from utils import (
@pytest.mark.skipif(
sys.platform == "darwin" or sys.version < "3.7",
reason="do not run on mac os or py<3.7",
sys.platform in ["darwin", "win32"] or sys.version < "3.7",
reason="do not run on mac os, windows or py<3.7",
)
def test_tokenclassification_idlabel():
from flaml import AutoML
@ -65,12 +65,15 @@ def test_tokenclassification_idlabel():
assert val_loss == min_inter_result
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
@pytest.mark.skipif(
sys.platform == "darwin" or sys.version < "3.7",
reason="do not run on mac os or py<3.7",
sys.platform in ["darwin", "win32"] or sys.version < "3.7",
reason="do not run on mac os, windows or py<3.7",
)
def test_tokenclassification_tokenlabel():
from flaml import AutoML
@ -112,7 +115,10 @@ def test_tokenclassification_tokenlabel():
assert val_loss == min_inter_result
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
if __name__ == "__main__":

View File

@ -3,6 +3,7 @@ import sys
from flaml.default import portfolio
import os
import shutil
import pytest
def pop_args(fit_kwargs):
@ -18,6 +19,7 @@ def test_build_portfolio(path="./test/nlp/default", strategy="greedy"):
portfolio.main()
@pytest.mark.skipif(sys.platform == "win32", reason="do not run on windows")
def test_starting_point_not_in_search_space():
from flaml import AutoML
@ -84,9 +86,13 @@ def test_starting_point_not_in_search_space():
)
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
@pytest.mark.skipif(sys.platform == "win32", reason="do not run on windows")
def test_points_to_evaluate():
from flaml import AutoML
@ -106,10 +112,14 @@ def test_points_to_evaluate():
automl.fit(X_train, y_train, **automl_settings)
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
# TODO: implement _test_zero_shot_model
@pytest.mark.skipif(sys.platform == "win32", reason="do not run on windows")
def test_zero_shot_nomodel():
from flaml.default import preprocess_and_suggest_hyperparams
@ -141,7 +151,10 @@ def test_zero_shot_nomodel():
model.fit(X_train, y_train, **fit_kwargs)
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
def test_build_error_portfolio(path="./test/nlp/default", strategy="greedy"):
@ -176,4 +189,7 @@ def test_build_error_portfolio(path="./test/nlp/default", strategy="greedy"):
import shutil
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")

View File

@ -12,7 +12,7 @@
- 'regression': regression with tabular data.
- 'ts_forecast': time series forecasting.
- 'ts_forecast_classification': time series forecasting for classification.
- 'ts_forecast_panel': time series forecasting for panel datasets (multiple time series).
<!-- - 'ts_forecast_panel': time series forecasting for panel datasets (multiple time series). -->
- 'rank': learning to rank.
- 'seq-classification': sequence classification.
- 'seq-regression': sequence regression.
@ -120,7 +120,7 @@ The estimator list can contain one or more estimator names, each corresponding t
- 'arima': ARIMA for task "ts_forecast". Hyperparameters: p, d, q.
- 'sarimax': SARIMAX for task "ts_forecast". Hyperparameters: p, d, q, P, D, Q, s.
- 'transformer': Huggingface transformer models for task "seq-classification", "seq-regression", "multichoice-classification", "token-classification" and "summarization". Hyperparameters: learning_rate, num_train_epochs, per_device_train_batch_size, warmup_ratio, weight_decay, adam_epsilon, seed.
- 'temporal_fusion_transform': TemporalFusionTransformerEstimator for task "ts_forecast_panel". Hyperparameters: gradient_clip_val, hidden_size, hidden_continuous_size, attention_head_size, dropout, learning_rate.
<!-- - 'temporal_fusion_transform': TemporalFusionTransformerEstimator for task "ts_forecast_panel". Hyperparameters: gradient_clip_val, hidden_size, hidden_continuous_size, attention_head_size, dropout, learning_rate. -->
* Custom estimator. Use custom estimator for:
- tuning an estimator that is not built-in;
- customizing search space for a built-in estimator.