diff --git a/flaml/automl.py b/flaml/automl.py index 15a677657..080c9202a 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -321,61 +321,62 @@ class AutoMLState: if self.time_budget is None else self.time_budget - self.time_from_start ) - # if self.resources_per_trial.get("gpu", 0) > 0: + if ( + hasattr(self, "resources_per_trial") + and self.resources_per_trial.get("gpu", 0) > 0 + ): - # def _trainable_function_wrapper(config: dict): + def _trainable_function_wrapper(config: dict): - # return_estimator, train_time = train_estimator( - # X_train=sampled_X_train, - # y_train=sampled_y_train, - # config_dic=config, - # task=self.task, - # estimator_name=estimator, - # n_jobs=self.n_jobs, - # estimator_class=self.learner_classes.get(estimator), - # budget=budget, - # fit_kwargs=self.fit_kwargs, - # ) - # return {"estimator": return_estimator, "train_time": train_time} + return_estimator, train_time = train_estimator( + X_train=sampled_X_train, + y_train=sampled_y_train, + config_dic=config, + task=self.task, + estimator_name=estimator, + n_jobs=self.n_jobs, + estimator_class=self.learner_classes.get(estimator), + budget=budget, + fit_kwargs=self.fit_kwargs, + ) + return {"estimator": return_estimator, "train_time": train_time} - # if estimator not in self.learner_classes: - # self.learner_classes[estimator] = get_estimator_class( - # self.task, estimator - # ) + if estimator not in self.learner_classes: + self.learner_classes[estimator] = get_estimator_class( + self.task, estimator + ) - # analysis = tune.run( - # _trainable_function_wrapper, - # config=config_w_resource, - # metric="train_time", - # mode="min", - # resources_per_trial=self.resources_per_trial, - # num_samples=1, - # use_ray=True, - # ) - # result = list(analysis.results.values())[0] - # estimator, train_time = result["estimator"], result["train_time"] - - # else: - if _is_nlp_task(self.task): - use_ray = self.fit_kwargs.get("use_ray") - self.fit_kwargs["use_ray"] = False - # TODO: limit number of GPUs - estimator, train_time = train_estimator( - X_train=sampled_X_train, - y_train=sampled_y_train, - config_dic=config, - task=self.task, - estimator_name=estimator, - n_jobs=self.n_jobs, - estimator_class=self.learner_classes.get(estimator), - budget=budget, - fit_kwargs=self.fit_kwargs, - ) - if _is_nlp_task(self.task): - if use_ray is None: - del self.fit_kwargs["use_ray"] - else: - self.fit_kwargs["use_ray"] = use_ray + analysis = tune.run( + _trainable_function_wrapper, + config=config_w_resource, + metric="train_time", + mode="min", + resources_per_trial=self.resources_per_trial, + num_samples=1, + use_ray=True, + ) + result = list(analysis.results.values())[0] + estimator, train_time = result["estimator"], result["train_time"] + else: + if _is_nlp_task(self.task): + use_ray = self.fit_kwargs.get("use_ray") + self.fit_kwargs["use_ray"] = False + estimator, train_time = train_estimator( + X_train=sampled_X_train, + y_train=sampled_y_train, + config_dic=config, + task=self.task, + estimator_name=estimator, + n_jobs=self.n_jobs, + estimator_class=self.learner_classes.get(estimator), + budget=budget, + fit_kwargs=self.fit_kwargs, + ) + if _is_nlp_task(self.task): + if use_ray is None: + del self.fit_kwargs["use_ray"] + else: + self.fit_kwargs["use_ray"] = use_ray if sampled_weight is not None: self.fit_kwargs["sample_weight"] = weight return estimator, train_time diff --git a/flaml/model.py b/flaml/model.py index e6be73307..15dd96ffc 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -384,6 +384,16 @@ class TransformersEstimator(BaseEstimator): else: return X, None + def _model_init(self, num_labels, per_model_config): + from .nlp.utils import load_model + + return load_model( + checkpoint_path=self.custom_hpo_args.model_path, + task=self._task, + num_labels=num_labels, + per_model_config=per_model_config, + ) + def fit(self, X_train: DataFrame, y_train: Series, budget=None, **kwargs): from transformers import EarlyStoppingCallback from transformers.trainer_utils import set_seed @@ -548,17 +558,9 @@ class TransformersEstimator(BaseEstimator): **training_args_config, ) - def _model_init(): - return load_model( - checkpoint_path=self.custom_hpo_args.model_path, - task=self._task, - num_labels=num_labels, - per_model_config=per_model_config, - ) - - self._model = TrainerForAuto( + self._trainer = TrainerForAuto( args=training_args, - model_init=_model_init, + model_init=partial(self._model_init, num_labels, per_model_config), train_dataset=train_dataset, eval_dataset=eval_dataset, tokenizer=tokenizer, @@ -572,20 +574,27 @@ class TransformersEstimator(BaseEstimator): callbacks=[EarlyStoppingCallbackForAuto], ) - setattr(self._model, "_use_ray", self.use_ray) + setattr(self._trainer, "_use_ray", self.use_ray) if self._task in NLG_TASKS: - setattr(self._model, "_is_seq2seq", True) - self._model.train() + setattr(self._trainer, "_is_seq2seq", True) + self._trainer.train() - self.params[self.ITER_HP] = self._model.state.global_step - self._checkpoint_path = self._select_checkpoint(self._model) + self.params[self.ITER_HP] = self._trainer.state.global_step + self._checkpoint_path = self._select_checkpoint(self._trainer) self._kwargs = kwargs self._num_labels = num_labels self._per_model_config = per_model_config self._training_args_config = training_args_config - self._ckpt_remains = list(self._model.ckpt_to_metric.keys()) + self._ckpt_remains = list(self._trainer.ckpt_to_metric.keys()) + self._model = load_model( + checkpoint_path=self._checkpoint_path, + task=self._task, + num_labels=self._num_labels, + per_model_config=self._per_model_config, + ) + self._trainer = None def _delete_one_ckpt(self, ckpt_location): if self.use_ray is False: @@ -667,19 +676,12 @@ class TransformersEstimator(BaseEstimator): def _init_model_for_predict(self, X_test): from datasets import Dataset - from .nlp.utils import load_model from transformers import AutoTokenizer from .nlp.huggingface.trainer import TrainerForAuto from .nlp.huggingface.data_collator import DataCollatorForPredict X_test, _ = self._preprocess(X_test, **self._kwargs) test_dataset = Dataset.from_pandas(X_test) - best_model = load_model( - checkpoint_path=self._checkpoint_path, - task=self._task, - num_labels=self._num_labels, - per_model_config=self._per_model_config, - ) training_args = self._TrainingArguments( per_device_eval_batch_size=1, output_dir=self.custom_hpo_args.output_dir, @@ -688,8 +690,8 @@ class TransformersEstimator(BaseEstimator): tokenizer = AutoTokenizer.from_pretrained( self.custom_hpo_args.model_path, use_fast=True ) - self._model = TrainerForAuto( - model=best_model, + self._trainer = TrainerForAuto( + model=self._model, args=training_args, data_collator=DataCollatorForPredict( tokenizer=tokenizer, @@ -706,20 +708,21 @@ class TransformersEstimator(BaseEstimator): ), "predict_proba() only for classification tasks." test_dataset, _ = self._init_model_for_predict(X_test) - predictions = self._model.predict(test_dataset) + predictions = self._trainer.predict(test_dataset) + self._trainer = None return predictions.predictions def predict(self, X_test): test_dataset, training_args = self._init_model_for_predict(X_test) if self._task not in NLG_TASKS: - predictions = self._model.predict(test_dataset) + predictions = self._trainer.predict(test_dataset) else: - predictions = self._model.predict( + predictions = self._trainer.predict( test_dataset, max_length=training_args.generation_max_length, num_beams=training_args.generation_num_beams, ) - + self._trainer = None if self._task == SEQCLASSIFICATION: return np.argmax(predictions.predictions, axis=1) elif self._task == SEQREGRESSION: diff --git a/test/nlp/test_autohf.py b/test/nlp/test_autohf.py index 32d116834..0227b878a 100644 --- a/test/nlp/test_autohf.py +++ b/test/nlp/test_autohf.py @@ -1,5 +1,7 @@ import sys import pytest +import pickle +import shutil @pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os") @@ -53,6 +55,7 @@ def test_hf_data(): automl.fit( X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings ) + automl = AutoML() automl.retrain_from_log( X_train=X_train, @@ -61,7 +64,11 @@ def test_hf_data(): record_id=0, **automl_settings ) - + with open("automl.pkl", "wb") as f: + pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL) + with open("automl.pkl", "rb") as f: + automl = pickle.load(f) + shutil.rmtree("test/data/output/") automl.predict(X_test) automl.predict(["test test", "test test"]) automl.predict( diff --git a/test/nlp/test_autohf_custom_metric.py b/test/nlp/test_autohf_custom_metric.py index 885dcfe77..6df95b943 100644 --- a/test/nlp/test_autohf_custom_metric.py +++ b/test/nlp/test_autohf_custom_metric.py @@ -18,6 +18,12 @@ def custom_metric( from datasets import Dataset from flaml.model import TransformersEstimator + if estimator._trainer is None: + estimator._init_model_for_predict(X_test) + trainer = estimator._trainer + estimator._trainer = None + else: + trainer = estimator._trainer if y_test is not None: X_test, _ = estimator._preprocess(X_test) eval_dataset = Dataset.from_pandas(TransformersEstimator._join(X_test, y_test)) @@ -25,14 +31,11 @@ def custom_metric( X_test, _ = estimator._preprocess(X_test) eval_dataset = Dataset.from_pandas(X_test) - trainer = estimator._model - trainer_compute_metrics_cache = trainer.compute_metrics trainer.compute_metrics = None metrics = trainer.evaluate(eval_dataset) trainer.compute_metrics = trainer_compute_metrics_cache - return metrics["eval_loss"], metrics diff --git a/test/nlp/test_autohf_multichoice_classification.py b/test/nlp/test_autohf_multichoice_classification.py index 4ec58366f..83c35e9ac 100644 --- a/test/nlp/test_autohf_multichoice_classification.py +++ b/test/nlp/test_autohf_multichoice_classification.py @@ -1,8 +1,8 @@ -import os +import sys import pytest -@pytest.mark.skipif(os.name == "darwin", reason="do not run on mac os") +@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os") def test_mcc(): from flaml import AutoML diff --git a/test/nlp/test_autohf_summarization.py b/test/nlp/test_autohf_summarization.py index 0bc531f1d..0a59474d0 100644 --- a/test/nlp/test_autohf_summarization.py +++ b/test/nlp/test_autohf_summarization.py @@ -1,8 +1,8 @@ -import os +import sys import pytest -@pytest.mark.skipif(os.name == "darwin", reason="do not run on mac os") +@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os") def test_summarization(): from flaml import AutoML from pandas import DataFrame