fixing bug for ner (#463)

* fixing bug for ner

* removing global var

* adding class for trial counter

* adding notebook

* adding use_ray dict

* updating documentation for nlp
This commit is contained in:
Xueqing Liu 2022-03-20 22:03:02 -04:00 committed by GitHub
parent 5f688c1662
commit af423463c3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 1999 additions and 324 deletions

View File

@ -246,6 +246,7 @@ class AutoMLState:
* sample_size
/ state.data_size[0]
)
# raise Exception("bbbbb", state.time_budget, budget)
if _is_nlp_task(state.task):
state.fit_kwargs["X_val"] = state.X_val
@ -326,80 +327,29 @@ class AutoMLState:
weight = None
if groups is not None:
self.fit_kwargs["groups"] = groups
budget = (
None
if self.time_budget is None
else self.time_budget - self.time_from_start
)
if (
hasattr(self, "resources_per_trial")
and self.resources_per_trial.get("gpu", 0) > 0
):
if _is_nlp_task(self.task):
use_ray = self.fit_kwargs.get("use_ray")
self.fit_kwargs["use_ray"] = True
estimator, train_time = train_estimator(
X_train=sampled_X_train,
y_train=sampled_y_train,
config_dic=config,
task=self.task,
estimator_name=estimator,
n_jobs=self.n_jobs,
estimator_class=self.learner_classes.get(estimator),
budget=budget,
fit_kwargs=self.fit_kwargs,
eval_metric="train_time",
)
def _trainable_function_wrapper(config: dict):
return_estimator, train_time = train_estimator(
X_train=sampled_X_train,
y_train=sampled_y_train,
config_dic=config,
task=self.task,
estimator_name=estimator,
n_jobs=self.n_jobs,
estimator_class=self.learner_classes.get(estimator),
budget=budget,
fit_kwargs=self.fit_kwargs,
)
return {"estimator": return_estimator, "train_time": train_time}
if estimator not in self.learner_classes:
self.learner_classes[estimator] = get_estimator_class(
self.task, estimator
)
analysis = tune.run(
_trainable_function_wrapper,
config=config_w_resource,
metric="train_time",
mode="min",
resources_per_trial=self.resources_per_trial,
num_samples=1,
use_ray=True,
)
result = list(analysis.results.values())[0]
estimator, train_time = result["estimator"], result["train_time"]
if _is_nlp_task(self.task):
if use_ray is None:
del self.fit_kwargs["use_ray"]
else:
self.fit_kwargs["use_ray"] = use_ray
estimator.use_ray = False
else:
if _is_nlp_task(self.task):
use_ray = self.fit_kwargs.get("use_ray")
self.fit_kwargs["use_ray"] = False
estimator, train_time = train_estimator(
X_train=sampled_X_train,
y_train=sampled_y_train,
config_dic=config,
task=self.task,
estimator_name=estimator,
n_jobs=self.n_jobs,
estimator_class=self.learner_classes.get(estimator),
budget=budget,
fit_kwargs=self.fit_kwargs,
)
if _is_nlp_task(self.task):
if use_ray is None:
del self.fit_kwargs["use_ray"]
else:
self.fit_kwargs["use_ray"] = use_ray
if sampled_weight is not None:
self.fit_kwargs["sample_weight"] = weight
return estimator, train_time
@ -749,7 +699,11 @@ class AutoML(BaseEstimator):
"""Time taken to find best model in seconds."""
return self.__dict__.get("_time_taken_best_iter")
def predict(self, X: Union[np.array, pd.DataFrame, List[str], List[List[str]]]):
def predict(
self,
X: Union[np.array, pd.DataFrame, List[str], List[List[str]]],
**pred_kwargs,
):
"""Predict label from features.
Args:
@ -761,6 +715,8 @@ class AutoML(BaseEstimator):
arima or sarimax). Other columns in the dataframe
are assumed to be exogenous variables (categorical
or numeric).
**pred_kwargs: Other key word arguments to pass to predict() function of
the searched learners, such as per_device_eval_batch_size.
```python
multivariate_X_test = pd.DataFrame({
@ -782,7 +738,7 @@ class AutoML(BaseEstimator):
)
return None
X = self._preprocess(X)
y_pred = estimator.predict(X)
y_pred = estimator.predict(X, **pred_kwargs)
if (
isinstance(y_pred, np.ndarray)
and y_pred.ndim > 1
@ -796,12 +752,14 @@ class AutoML(BaseEstimator):
else:
return y_pred
def predict_proba(self, X):
def predict_proba(self, X, **pred_kwargs):
"""Predict the probability of each class from features, only works for
classification problems.
Args:
X: A numpy array of featurized instances, shape n * m.
**pred_kwargs: Other key word arguments to pass to predict_proba() function of
the searched learners, such as per_device_eval_batch_size.
Returns:
A numpy array of shape n * c. c is the # classes. Each element at
@ -814,7 +772,7 @@ class AutoML(BaseEstimator):
)
return None
X = self._preprocess(X)
proba = self._trained_estimator.predict_proba(X)
proba = self._trained_estimator.predict_proba(X, **pred_kwargs)
return proba
def _preprocess(self, X):
@ -1319,6 +1277,7 @@ class AutoML(BaseEstimator):
task=task,
estimator_name=estimator,
estimator_class=self._state.learner_classes.get(estimator),
eval_metric="train_time",
)
return estimator
@ -1680,6 +1639,17 @@ class AutoML(BaseEstimator):
"""
return self._state.data_size[0] if self._sample else None
def pickle(self, output_file_name):
import pickle
estimator_to_training_function = {}
for estimator in self.estimator_list:
search_state = self._search_states[estimator]
estimator_to_training_function[estimator] = search_state.training_function
del search_state.training_function
with open(output_file_name, "wb") as f:
pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)
@property
def trainable(self) -> Callable[[dict], Optional[float]]:
"""Training function.
@ -1960,10 +1930,10 @@ class AutoML(BaseEstimator):
augment rare classes.
min_sample_size: int, default=MIN_SAMPLE_TRAIN | the minimal sample
size when sample=True.
use_ray: boolean, default=False | Whether to use ray to run the training
use_ray: boolean or dict
If boolean: default=False | Whether to use ray to run the training
in separate processes. This can be used to prevent OOM for large
datasets, but will incur more overhead in time. Only use it if
you run into OOM failures.
datasets, but will incur more overhead in time.
metric_constraints: list, default=[] | The list of metric constraints.
Each element in this list is a 3-tuple, which shall be expressed
in the following format: the first element of the 3-tuple is the name of the
@ -2064,14 +2034,21 @@ class AutoML(BaseEstimator):
import ray
n_cpus = use_ray and ray.available_resources()["CPU"] or os.cpu_count()
self._state.resources_per_trial = (
# when using gpu, default cpu is 1 per job; otherwise, default cpu is n_cpus / n_concurrent_trials
{"cpu": max(int(n_cpus / n_concurrent_trials), 1), "gpu": gpu_per_trial}
if gpu_per_trial == 0
else {"cpu": 1, "gpu": gpu_per_trial}
(
{
"cpu": max(int((n_cpus - 2) / 2 / n_concurrent_trials), 1),
"gpu": gpu_per_trial,
}
if gpu_per_trial == 0
else {"cpu": 1, "gpu": gpu_per_trial}
)
if n_jobs < 0
else {"cpu": n_jobs, "gpu": gpu_per_trial}
)
if isinstance(X_train, ray.ObjectRef):
X_train = ray.get(X_train)
elif isinstance(dataframe, ray.ObjectRef):
@ -2131,7 +2108,11 @@ class AutoML(BaseEstimator):
)
)
if "auto" == metric:
if "binary" in self._state.task:
if _is_nlp_task(self._state.task):
from .nlp.utils import load_default_huggingface_metric_for_task
metric = load_default_huggingface_metric_for_task(self._state.task)
elif "binary" in self._state.task:
metric = "roc_auc"
elif "multi" in self._state.task:
metric = "log_loss"
@ -2139,17 +2120,9 @@ class AutoML(BaseEstimator):
metric = "mape"
elif self._state.task == "rank":
metric = "ndcg"
elif _is_nlp_task(self._state.task):
from .nlp.utils import load_default_huggingface_metric_for_task
metric = load_default_huggingface_metric_for_task(self._state.task)
else:
metric = "r2"
if _is_nlp_task(self._state.task):
self._state.fit_kwargs["metric"] = metric
self._state.fit_kwargs["use_ray"] = self._use_ray
self._state.metric = metric
def is_to_reverse_metric(metric, task):
@ -2355,6 +2328,14 @@ class AutoML(BaseEstimator):
elif "random" == self._hpo_method:
from ray.tune.suggest import BasicVariantGenerator as SearchAlgo
from ray.tune.sample import Domain
elif "optuna" == self._hpo_method:
try:
from ray import __version__ as ray_version
assert ray_version >= "1.0.0"
from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo
except (ImportError, AssertionError):
from .searcher.suggestion import OptunaSearch as SearchAlgo
else:
raise NotImplementedError(
f"hpo_method={self._hpo_method} is not recognized. "
@ -2382,24 +2363,48 @@ class AutoML(BaseEstimator):
else:
self._state.time_from_start = time.time() - self._start_time_flag
time_left = self._state.time_budget - self._state.time_from_start
search_alg = SearchAlgo(
metric="val_loss",
space=space,
low_cost_partial_config=self.low_cost_partial_config,
points_to_evaluate=self.points_to_evaluate,
cat_hp_cost=self.cat_hp_cost,
resource_attr=self.resource_attr,
min_resource=self.min_resource,
max_resource=self.max_resource,
config_constraints=[
(partial(size, self._state), "<=", self._mem_thres)
],
metric_constraints=self.metric_constraints,
seed=self._seed,
time_budget_s=time_left,
)
if self._hpo_method != "optuna":
search_alg = SearchAlgo(
metric="val_loss",
space=space,
low_cost_partial_config=self.low_cost_partial_config,
points_to_evaluate=self.points_to_evaluate,
cat_hp_cost=self.cat_hp_cost,
resource_attr=self.resource_attr,
min_resource=self.min_resource,
max_resource=self.max_resource,
config_constraints=[
(partial(size, self._state), "<=", self._mem_thres)
],
metric_constraints=self.metric_constraints,
seed=self._seed,
time_budget_s=time_left,
)
else:
# if self._hpo_method is bo, sometimes the search space and the initial config dimension do not match
# need to remove the extra keys from the search space to be consistent with the initial config
converted_space = SearchAlgo.convert_search_space(space)
removed_keys = set(space.keys()).difference(converted_space.keys())
new_points_to_evaluate = []
for idx in range(len(self.points_to_evaluate)):
r = self.points_to_evaluate[idx].copy()
for each_key in removed_keys:
r.pop(each_key)
new_points_to_evaluate.append(r)
search_alg = SearchAlgo(
metric="val_loss",
mode="min",
points_to_evaluate=[
p
for p in new_points_to_evaluate
if len(p) == len(converted_space)
],
)
search_alg = ConcurrencyLimiter(search_alg, self._n_concurrent_trials)
resources_per_trial = self._state.resources_per_trial
analysis = ray.tune.run(
self.trainable,
search_alg=search_alg,
@ -2413,6 +2418,7 @@ class AutoML(BaseEstimator):
raise_on_failed_trial=False,
keep_checkpoints_num=1,
checkpoint_score_attr="min-val_loss",
**self._use_ray if isinstance(self._use_ray, dict) else {},
)
# logger.info([trial.last_result for trial in analysis.trials])
trials = sorted(
@ -2579,6 +2585,7 @@ class AutoML(BaseEstimator):
if isinstance(search_state.init_config, list)
else [search_state.init_config]
)
low_cost_partial_config = search_state.low_cost_partial_config
if self._hpo_method in ("bs", "cfo", "grid", "cfocat", "random"):
algo = SearchAlgo(
@ -2598,6 +2605,20 @@ class AutoML(BaseEstimator):
seed=self._seed,
)
else:
# if self._hpo_method is bo, sometimes the search space and the initial config dimension do not match
# need to remove the extra keys from the search space to be consistent with the initial config
converted_space = SearchAlgo.convert_search_space(search_space)
removed_keys = set(search_space.keys()).difference(
converted_space.keys()
)
new_points_to_evaluate = []
for idx in range(len(points_to_evaluate)):
r = points_to_evaluate[idx].copy()
for each_key in removed_keys:
r.pop(each_key)
new_points_to_evaluate.append(r)
points_to_evaluate = new_points_to_evaluate
algo = SearchAlgo(
metric="val_loss",
mode="min",

View File

@ -397,6 +397,7 @@ def get_val_loss(
# fit_kwargs['groups_val'] = groups_val
# fit_kwargs['X_val'] = X_val
# fit_kwargs['y_val'] = y_val
estimator.fit(X_train, y_train, budget, **fit_kwargs)
val_loss, metric_for_logging, pred_time, _ = _eval_estimator(
config,
@ -561,6 +562,10 @@ def compute_estimator(
task=task,
n_jobs=n_jobs,
)
if isinstance(estimator, TransformersEstimator):
fit_kwargs["metric"] = eval_metric
if "holdout" == eval_method:
val_loss, metric_for_logging, train_time, pred_time = get_val_loss(
config_dic,
@ -604,6 +609,7 @@ def train_estimator(
estimator_class=None,
budget=None,
fit_kwargs={},
eval_metric=None,
):
start_time = time.time()
estimator_class = estimator_class or get_estimator_class(task, estimator_name)
@ -612,6 +618,9 @@ def train_estimator(
task=task,
n_jobs=n_jobs,
)
if isinstance(estimator, TransformersEstimator):
fit_kwargs["metric"] = eval_metric
if X_train is not None:
train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
else:

View File

@ -197,7 +197,7 @@ class BaseEstimator:
train_time = self._fit(X_train, y_train, **kwargs)
return train_time
def predict(self, X):
def predict(self, X, **kwargs):
"""Predict label from features.
Args:
@ -216,7 +216,7 @@ class BaseEstimator:
)
return np.ones(X.shape[0])
def predict_proba(self, X):
def predict_proba(self, X, **kwargs):
"""Predict the probability of each class from features.
Only works for classification problems
@ -325,7 +325,7 @@ class TransformersEstimator(BaseEstimator):
},
"num_train_epochs": {
"domain": tune.loguniform(lower=0.1, upper=10.0),
"init_value": 3,
"init_value": 1,
},
"per_device_train_batch_size": {
"domain": tune.choice([4, 8, 16, 32]),
@ -344,33 +344,38 @@ class TransformersEstimator(BaseEstimator):
"init_value": 1e-6,
},
"seed": {"domain": tune.choice(list(range(40, 45))), "init_value": 42},
"global_max_steps": {"domain": sys.maxsize, "init_value": sys.maxsize},
"global_max_steps": {
"domain": sys.maxsize,
"init_value": sys.maxsize,
},
}
if task in NLG_TASKS:
search_space_dict["generation_num_beams"] = {
"domain": tune.randint(2, 5),
"init_value": 3,
}
search_space_dict["generation_max_length"] = {
"domain": tune.choice([16, 32, 64, 128]),
"init_value": 64,
}
return search_space_dict
def _init_hpo_args(self, automl_fit_kwargs: dict = None):
from .nlp.utils import HPOArgs
def _init_hf_args(self, automl_fit_kwargs: dict = None):
from .nlp.utils import HFArgs
custom_hpo_args = HPOArgs()
for key, val in automl_fit_kwargs["custom_hpo_args"].items():
hf_args = HFArgs()
for key, val in automl_fit_kwargs["hf_args"].items():
assert (
key in custom_hpo_args.__dict__
), "The specified key {} is not in the argument list of flaml.nlp.utils::HPOArgs".format(
key in hf_args.__dict__
), "The specified key {} is not in the argument list of flaml.nlp.utils::HFArgs".format(
key
)
setattr(custom_hpo_args, key, val)
self.custom_hpo_args = custom_hpo_args
setattr(hf_args, key, val)
self.hf_args = hf_args
def _update_hf_args(self, automl_pred_kwargs: dict = None):
if automl_pred_kwargs:
hf_args = automl_pred_kwargs.get("hf_args")
if hf_args:
for key, val in hf_args.items():
assert (
key in self.hf_args.__dict__
), "The specified key {} is not in the argument list of flaml.nlp.utils::HFArgs".format(
key
)
setattr(self.hf_args, key, val)
def _preprocess(self, X, y=None, **kwargs):
from .nlp.utils import tokenize_text, is_a_list_of_str
@ -383,7 +388,7 @@ class TransformersEstimator(BaseEstimator):
X=X,
Y=y,
task=self._task,
custom_hpo_args=self.custom_hpo_args,
hf_args=self.hf_args,
tokenizer=self._tokenizer,
)
else:
@ -392,12 +397,63 @@ class TransformersEstimator(BaseEstimator):
def _model_init(self, num_labels, per_model_config):
from .nlp.utils import load_model
return load_model(
checkpoint_path=self.custom_hpo_args.model_path,
this_model = load_model(
checkpoint_path=self.hf_args.model_path,
task=self._task,
num_labels=num_labels,
per_model_config=per_model_config,
)
return this_model
def _get_training_args(self, local_rank=-1):
import transformers
if self._task in NLG_TASKS:
self._training_args_config["predict_with_generate"] = True
if transformers.__version__.startswith("3"):
training_args = self._TrainingArguments(
report_to=[],
output_dir=self._trial_dir,
do_train=True,
do_eval=True,
eval_steps=self._ckpt_freq,
evaluate_during_training=True,
save_steps=self._ckpt_freq,
logging_steps=self._ckpt_freq,
save_total_limit=0,
metric_for_best_model="loss",
fp16=self.hf_args.fp16
if self._kwargs.get("gpu_per_trial") > 0
else False,
no_cuda=True if self._kwargs.get("gpu_per_trial") == 0 else False,
local_rank=local_rank,
per_device_eval_batch_size=self.hf_args.per_device_eval_batch_size,
**self._training_args_config,
)
else:
from transformers import IntervalStrategy
training_args = self._TrainingArguments(
report_to=[],
output_dir=self._trial_dir,
do_train=True,
do_eval=True,
eval_steps=self._ckpt_freq,
logging_steps=self._ckpt_freq,
evaluation_strategy=IntervalStrategy.STEPS,
save_steps=self._ckpt_freq,
save_total_limit=0,
metric_for_best_model="loss",
fp16=self.hf_args.fp16
if self._kwargs.get("gpu_per_trial") > 0
else False,
local_rank=local_rank,
no_cuda=True if self._kwargs.get("gpu_per_trial") == 0 else False,
per_device_eval_batch_size=self.hf_args.per_device_eval_batch_size,
**self._training_args_config,
)
return training_args
def fit(self, X_train: DataFrame, y_train: Series, budget=None, **kwargs):
import transformers
@ -411,18 +467,11 @@ class TransformersEstimator(BaseEstimator):
from .nlp.utils import (
get_num_labels,
separate_config,
load_model,
compute_checkpoint_freq,
get_trial_fold_name,
Counter,
date_str,
)
# TODO: if self._task == QUESTIONANSWERING, uncomment the code below (add indentation before
# from .nlp.huggingface.trainer import TrainerForAuto)
# if self._task in NLG_TASKS:
# from .nlp.huggingface.trainer import Seq2SeqTrainerForAuto as TrainerForAuto
# else:
from .nlp.huggingface.trainer import TrainerForAuto
from .nlp.huggingface.data_collator import DataCollatorForAuto
from .nlp.utils import get_auto_tokenizer
@ -462,13 +511,22 @@ class TransformersEstimator(BaseEstimator):
set_seed(self.params.get("seed", self._TrainingArguments.seed))
self._init_hpo_args(kwargs)
self._init_hf_args(kwargs)
self._tokenizer = get_auto_tokenizer(
self.custom_hpo_args.model_path, self._task
self.hf_args.tokenizer_model_path
if self.hf_args.tokenizer_model_path
else self.hf_args.model_path,
self._task,
)
self._metric = kwargs["metric"]
self.use_ray = kwargs.get("use_ray")
try:
from ray.tune import is_session_enabled
self.use_ray = is_session_enabled()
except ImportError:
self.use_ray = False
X_val = kwargs.get("X_val")
y_val = kwargs.get("y_val")
@ -498,70 +556,41 @@ class TransformersEstimator(BaseEstimator):
eval_dataset = None
num_labels = get_num_labels(self._task, self._y_train)
training_args_config, per_model_config = separate_config(
self._training_args_config, self._per_model_config = separate_config(
self.params, self._task
)
ckpt_freq = compute_checkpoint_freq(
self._ckpt_freq = compute_checkpoint_freq(
train_data_size=len(self._X_train),
custom_hpo_args=self.custom_hpo_args,
num_train_epochs=training_args_config.get(
hf_args=self.hf_args,
num_train_epochs=self._training_args_config.get(
"num_train_epochs", self._TrainingArguments.num_train_epochs
),
batch_size=training_args_config.get(
batch_size=self._training_args_config.get(
"per_device_train_batch_size",
self._TrainingArguments.per_device_train_batch_size,
),
)
local_dir = os.path.join(
self.custom_hpo_args.output_dir, "train_{}".format(date_str())
)
local_dir = os.path.join(self.hf_args.output_dir, "train_{}".format(date_str()))
if not self.use_ray:
# if self.params = {}, don't include configuration in trial fold name
trial_dir = get_trial_fold_name(local_dir, self.params, self.trial_id)
else:
if self.use_ray is True:
import ray
trial_dir = ray.tune.get_trial_dir()
if transformers.__version__.startswith("3"):
training_args = self._TrainingArguments(
report_to=[],
output_dir=trial_dir,
do_train=True,
do_eval=True,
eval_steps=ckpt_freq,
evaluate_during_training=True,
save_steps=ckpt_freq,
logging_steps=ckpt_freq,
save_total_limit=0,
metric_for_best_model="loss",
fp16=self.custom_hpo_args.fp16,
**training_args_config,
)
self._trial_dir = ray.tune.get_trial_dir()
else:
from transformers import IntervalStrategy
training_args = self._TrainingArguments(
report_to=[],
output_dir=trial_dir,
do_train=True,
do_eval=True,
per_device_eval_batch_size=1,
eval_steps=ckpt_freq,
logging_steps=ckpt_freq,
evaluation_strategy=IntervalStrategy.STEPS,
save_steps=ckpt_freq,
save_total_limit=0,
metric_for_best_model="loss",
fp16=self.custom_hpo_args.fp16,
**training_args_config,
# if self.params = {}, don't include configuration in trial fold name
self._trial_dir = Counter.get_trial_fold_name(
local_dir, self.params, self.trial_id
)
self._kwargs = kwargs
self._num_labels = num_labels
training_args = self._get_training_args(local_rank=-1)
self._trainer = TrainerForAuto(
args=training_args,
model_init=partial(self._model_init, num_labels, per_model_config),
model_init=partial(self._model_init, num_labels, self._per_model_config),
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=self._tokenizer,
@ -575,28 +604,33 @@ class TransformersEstimator(BaseEstimator):
callbacks=[EarlyStoppingCallbackForAuto],
)
setattr(self._trainer, "_use_ray", self.use_ray)
if self._task in NLG_TASKS:
setattr(self._trainer, "_is_seq2seq", True)
if kwargs.get("gpu_per_trial"):
self._trainer.args._n_gpu = kwargs.get("gpu_per_trial")
gpu_per_trial = kwargs.get("gpu_per_trial", None)
if gpu_per_trial:
tmp_cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "")
self._trainer.args._n_gpu = gpu_per_trial
# if gpu_per_trial == 0:
# os.environ["CUDA_VISIBLE_DEVICES"] = ""
if tmp_cuda_visible_devices.count(",") != gpu_per_trial - 1:
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
[str(x) for x in range(gpu_per_trial)]
)
import time
start_time = time.time()
self._trainer.train()
if gpu_per_trial:
os.environ["CUDA_VISIBLE_DEVICES"] = tmp_cuda_visible_devices
self.params[self.ITER_HP] = self._trainer.state.global_step
self._checkpoint_path = self._select_checkpoint(self._trainer)
self._kwargs = kwargs
self._num_labels = num_labels
self._per_model_config = per_model_config
self._training_args_config = training_args_config
self._ckpt_remains = list(self._trainer.ckpt_to_metric.keys())
self._model = load_model(
checkpoint_path=self._checkpoint_path,
task=self._task,
num_labels=self._num_labels,
per_model_config=self._per_model_config,
)
if hasattr(self._trainer, "intermediate_results"):
self.intermediate_results = [
x[1]
@ -605,6 +639,7 @@ class TransformersEstimator(BaseEstimator):
)
]
self._trainer = None
return time.time() - start_time
def _delete_one_ckpt(self, ckpt_location):
if self.use_ray is False:
@ -689,16 +724,21 @@ class TransformersEstimator(BaseEstimator):
from datasets import Dataset
from .nlp.huggingface.trainer import TrainerForAuto
from .nlp.huggingface.data_collator import DataCollatorForPredict
from .nlp.utils import load_model
X_test, _ = self._preprocess(X_test, **self._kwargs)
test_dataset = Dataset.from_pandas(X_test)
training_args = self._TrainingArguments(
per_device_eval_batch_size=1,
output_dir=self.custom_hpo_args.output_dir,
**self._training_args_config,
this_model = load_model(
checkpoint_path=self._checkpoint_path,
task=self._task,
num_labels=self._num_labels,
per_model_config=self._per_model_config,
)
self._trainer = TrainerForAuto(
model=self._model,
training_args = self._get_training_args(local_rank=-1)
new_trainer = TrainerForAuto(
model=this_model,
args=training_args,
data_collator=DataCollatorForPredict(
tokenizer=self._tokenizer,
@ -708,31 +748,36 @@ class TransformersEstimator(BaseEstimator):
else None,
compute_metrics=self._compute_metrics_by_dataset_name,
)
return test_dataset, training_args
if self._task in NLG_TASKS:
setattr(new_trainer, "_is_seq2seq", True)
return new_trainer, test_dataset, training_args
def predict_proba(self, X):
def predict_proba(self, X, **kwargs):
self._update_hf_args(kwargs)
assert (
self._task in CLASSIFICATION
), "predict_proba() only for classification tasks."
test_dataset, _ = self._init_model_for_predict(X)
predictions = self._trainer.predict(test_dataset)
if self.use_ray is True:
self._trainer = None
new_trainer, test_dataset, _ = self._init_model_for_predict(X)
predictions = new_trainer.predict(test_dataset)
return predictions.predictions
def predict(self, X):
test_dataset, training_args = self._init_model_for_predict(X)
def predict(self, X, **kwargs):
import transformers
transformers.logging.set_verbosity_error()
self._update_hf_args(kwargs)
new_trainer, test_dataset, training_args = self._init_model_for_predict(X)
if self._task not in NLG_TASKS:
predictions = self._trainer.predict(test_dataset)
predictions = new_trainer.predict(test_dataset)
else:
predictions = self._trainer.predict(
predictions = new_trainer.predict(
test_dataset,
max_length=training_args.generation_max_length,
num_beams=training_args.generation_num_beams,
metric_key_prefix="predict",
)
if self.use_ray is True:
self._trainer = None
if self._task == SEQCLASSIFICATION:
return np.argmax(predictions.predictions, axis=1)
elif self._task == SEQREGRESSION:
@ -740,10 +785,8 @@ class TransformersEstimator(BaseEstimator):
elif self._task == TOKENCLASSIFICATION:
return np.argmax(predictions.predictions, axis=2)
elif self._task == SUMMARIZATION:
if isinstance(predictions.predictions, tuple):
predictions = np.argmax(predictions.predictions[0], axis=2)
decoded_preds = self._tokenizer.batch_decode(
predictions, skip_special_tokens=True
predictions.predictions, skip_special_tokens=True
)
return decoded_preds
elif self._task == MULTICHOICECLASSIFICATION:
@ -1121,7 +1164,7 @@ class XGBoostEstimator(SKLearnEstimator):
train_time = time.time() - start_time
return train_time
def predict(self, X):
def predict(self, X, **kwargs):
import xgboost as xgb
if not issparse(X):
@ -1617,7 +1660,7 @@ class Prophet(SKLearnEstimator):
self._model = model
return train_time
def predict(self, X):
def predict(self, X, **kwargs):
if isinstance(X, int):
raise ValueError(
"predict() with steps is only supported for arima/sarimax."
@ -1697,7 +1740,7 @@ class ARIMA(Prophet):
self._model = model
return train_time
def predict(self, X):
def predict(self, X, **kwargs):
if self._model is not None:
if isinstance(X, int):
forecast = self._model.forecast(steps=X)
@ -1894,7 +1937,7 @@ class TS_SKLearn(SKLearnEstimator):
train_time = time.time() - current_time
return train_time
def predict(self, X):
def predict(self, X, **kwargs):
if self._model is not None:
X = self.transform_X(X)
X = self._preprocess(X)

View File

@ -2,6 +2,7 @@ import argparse
from dataclasses import dataclass, field
from itertools import chain
from typing import Dict, Any
import numpy as np
from ..data import (
SUMMARIZATION,
@ -20,61 +21,54 @@ def load_default_huggingface_metric_for_task(task):
elif task == SEQREGRESSION:
return "r2"
elif task == SUMMARIZATION:
return "rouge"
return "rouge1"
elif task == MULTICHOICECLASSIFICATION:
return "accuracy"
elif task == TOKENCLASSIFICATION:
return "seqeval"
global tokenized_column_names
def get_auto_tokenizer(model_path, task):
def get_auto_tokenizer(tokenizer_model_path, task):
from transformers import AutoTokenizer
if task == SUMMARIZATION:
return AutoTokenizer.from_pretrained(
model_path, # 'roberta-base'
pretrained_model_name_or_path=tokenizer_model_path,
cache_dir=None,
use_fast=True,
revision="main",
use_auth_token=None,
)
else:
return AutoTokenizer.from_pretrained(model_path, use_fast=True)
return AutoTokenizer.from_pretrained(tokenizer_model_path, use_fast=True)
def tokenize_text(X, Y=None, task=None, custom_hpo_args=None, tokenizer=None):
def tokenize_text(X, Y=None, task=None, hf_args=None, tokenizer=None):
if task in (SEQCLASSIFICATION, SEQREGRESSION):
X_tokenized = tokenize_onedataframe(
X,
tokenizer=tokenizer,
task=task,
custom_hpo_args=custom_hpo_args,
hf_args=hf_args,
prefix_str="",
)
return X_tokenized, None
elif task == TOKENCLASSIFICATION:
return tokenize_text_tokclassification(
X, Y, tokenizer=tokenizer, custom_hpo_args=custom_hpo_args
X, Y, tokenizer=tokenizer, hf_args=hf_args
)
elif task in NLG_TASKS:
return tokenize_seq2seq(
X, Y, tokenizer=tokenizer, task=task, custom_hpo_args=custom_hpo_args
)
return tokenize_seq2seq(X, Y, tokenizer=tokenizer, task=task, hf_args=hf_args)
elif task == MULTICHOICECLASSIFICATION:
return tokenize_text_multiplechoice(
X, tokenizer=tokenizer, custom_hpo_args=custom_hpo_args
)
return tokenize_text_multiplechoice(X, tokenizer=tokenizer, hf_args=hf_args)
def tokenize_seq2seq(X, Y, tokenizer, task=None, custom_hpo_args=None):
def tokenize_seq2seq(X, Y, tokenizer, task=None, hf_args=None):
model_inputs = tokenize_onedataframe(
X,
tokenizer=tokenizer,
task=task,
custom_hpo_args=custom_hpo_args,
hf_args=hf_args,
prefix_str="summarize: ",
)
labels = None
@ -83,7 +77,7 @@ def tokenize_seq2seq(X, Y, tokenizer, task=None, custom_hpo_args=None):
Y.to_frame(),
tokenizer=tokenizer,
task=task,
custom_hpo_args=custom_hpo_args,
hf_args=hf_args,
prefix_str="",
)
labels["label"] = [
@ -97,15 +91,18 @@ def tokenize_seq2seq(X, Y, tokenizer, task=None, custom_hpo_args=None):
def tokenize_and_align_labels(
examples, tokenizer, custom_hpo_args=None, X_sent_key=None, Y_sent_key=None
examples,
tokenizer,
hf_args=None,
X_sent_key=None,
Y_sent_key=None,
return_column_name=False,
):
global tokenized_column_names
tokenized_inputs = tokenizer(
[list(examples[X_sent_key])],
padding="max_length",
truncation=True,
max_length=custom_hpo_args.max_seq_length,
max_length=hf_args.max_seq_length,
# We use this argument because the texts in our dataset are lists of words (with a label for each word).
is_split_into_words=True,
)
@ -134,27 +131,37 @@ def tokenize_and_align_labels(
# label_ids.append(b_to_i_label[label_to_id[label[word_idx]]])
previous_word_idx = word_idx
tokenized_inputs["label"] = label_ids
tokenized_column_names = sorted(tokenized_inputs.keys())
tokenized_input_and_labels = [tokenized_inputs[x] for x in tokenized_column_names]
for key_idx, each_key in enumerate(tokenized_column_names):
tmp_column_names = sorted(tokenized_inputs.keys())
tokenized_input_and_labels = [tokenized_inputs[x] for x in tmp_column_names]
for key_idx, each_key in enumerate(tmp_column_names):
if each_key != "label":
tokenized_input_and_labels[key_idx] = tokenized_input_and_labels[key_idx][0]
return tokenized_input_and_labels
if return_column_name:
return tokenized_input_and_labels, tmp_column_names
else:
return tokenized_input_and_labels
def tokenize_text_tokclassification(X, Y, tokenizer, custom_hpo_args=None):
def tokenize_text_tokclassification(X, Y, tokenizer, hf_args=None):
import pandas as pd
global tokenized_column_names
if Y is not None:
X_and_Y = pd.concat([X, Y.to_frame()], axis=1)
X_key = list(X.keys())[0]
Y_key = list(Y.to_frame().keys())[0]
_, tokenized_column_names = tokenize_and_align_labels(
X_and_Y.iloc[0],
tokenizer=tokenizer,
hf_args=hf_args,
X_sent_key=X_key,
Y_sent_key=Y_key,
return_column_name=True,
)
X_and_Y_tokenized = X_and_Y.apply(
lambda x: tokenize_and_align_labels(
x,
tokenizer=tokenizer,
custom_hpo_args=custom_hpo_args,
hf_args=hf_args,
X_sent_key=X_key,
Y_sent_key=Y_key,
),
@ -170,11 +177,21 @@ def tokenize_text_tokclassification(X, Y, tokenizer, custom_hpo_args=None):
y_tokenized = X_and_Y_tokenized.iloc[:, label_idx]
else:
X_key = list(X.keys())[0]
_, tokenized_column_names = tokenize_and_align_labels(
X.iloc[0],
tokenizer=tokenizer,
hf_args=hf_args,
X_sent_key=X_key,
Y_sent_key=None,
return_column_name=True,
)
d = X.apply(
lambda x: tokenize_and_align_labels(
x,
tokenizer=tokenizer,
custom_hpo_args=custom_hpo_args,
hf_args=hf_args,
X_sent_key=X_key,
Y_sent_key=None,
),
@ -192,28 +209,34 @@ def tokenize_onedataframe(
X,
tokenizer,
task=None,
custom_hpo_args=None,
hf_args=None,
prefix_str=None,
):
import pandas
global tokenized_column_names
with tokenizer.as_target_tokenizer():
_, tokenized_column_names = tokenize_row(
dict(X.iloc[0]),
tokenizer,
prefix=(prefix_str,) if task is SUMMARIZATION else None,
task=task,
hf_args=hf_args,
return_column_name=True,
)
d = X.apply(
lambda x: tokenize_row(
x,
tokenizer,
prefix=(prefix_str,) if task is SUMMARIZATION else None,
task=task,
custom_hpo_args=custom_hpo_args,
hf_args=hf_args,
),
axis=1,
result_type="expand",
)
X_tokenized = pandas.DataFrame(columns=tokenized_column_names)
X_tokenized[tokenized_column_names] = d
return X_tokenized
X_tokenized = pandas.DataFrame(columns=tokenized_column_names)
X_tokenized[tokenized_column_names] = d
return X_tokenized
def postprocess_text(preds, labels):
@ -230,35 +253,49 @@ def postprocess_text(preds, labels):
return preds, labels
def tokenize_row(this_row, tokenizer, prefix=None, task=None, custom_hpo_args=None):
global tokenized_column_names
def tokenize_row(
this_row,
tokenizer,
prefix=None,
task=None,
hf_args=None,
return_column_name=False,
):
assert (
"max_seq_length" in custom_hpo_args.__dict__
"max_seq_length" in hf_args.__dict__
), "max_seq_length must be provided for glue"
if prefix:
this_row = tuple(["".join(x) for x in zip(prefix, this_row)])
# tokenizer.pad_token = tokenizer.eos_token
tokenized_example = tokenizer(
*tuple(this_row),
padding="max_length",
max_length=custom_hpo_args.max_seq_length,
max_length=hf_args.max_seq_length,
truncation=True,
)
if task in NLG_TASKS:
tokenized_example["decoder_input_ids"] = tokenized_example["input_ids"]
tokenized_column_names = sorted(tokenized_example.keys())
return [tokenized_example[x] for x in tokenized_column_names]
tmp_column_names = sorted(tokenized_example.keys())
if return_column_name:
return [tokenized_example[x] for x in tmp_column_names], tmp_column_names
else:
return [tokenized_example[x] for x in tmp_column_names]
def tokenize_text_multiplechoice(X, tokenizer, custom_hpo_args=None):
def tokenize_text_multiplechoice(X, tokenizer, hf_args=None):
import pandas
global tokenized_column_names
t = X[["sent1", "sent2", "ending0", "ending1", "ending2", "ending3"]]
_, tokenized_column_names = tokenize_swag(
t.iloc[0],
tokenizer=tokenizer,
hf_args=hf_args,
return_column_name=True,
)
d = t.apply(
lambda x: tokenize_swag(x, tokenizer, custom_hpo_args),
lambda x: tokenize_swag(x, tokenizer=tokenizer, hf_args=hf_args),
axis=1,
result_type="expand",
)
@ -269,9 +306,7 @@ def tokenize_text_multiplechoice(X, tokenizer, custom_hpo_args=None):
return output, None
def tokenize_swag(this_row, tokenizer, custom_hpo_args=None):
global tokenized_column_names
def tokenize_swag(this_row, tokenizer, hf_args=None, return_column_name=False):
first_sentences = [[this_row["sent1"]] * 4]
# get each 1st sentence, multiply to 4 sentences
question_headers = this_row["sent2"]
@ -289,11 +324,15 @@ def tokenize_swag(this_row, tokenizer, custom_hpo_args=None):
tokenized_example = tokenizer(
*tuple([first_sentences, second_sentences]),
truncation=True,
max_length=custom_hpo_args.max_seq_length,
max_length=hf_args.max_seq_length,
padding=False,
)
tokenized_column_names = sorted(tokenized_example.keys())
return [tokenized_example[x] for x in tokenized_column_names]
tmp_column_names = sorted(tokenized_example.keys())
if return_column_name:
return [tokenized_example[x] for x in tmp_column_names], tmp_column_names
else:
return [tokenized_example[x] for x in tmp_column_names]
def separate_config(config, task):
@ -333,7 +372,9 @@ def get_num_labels(task, y_train):
def is_a_list_of_str(this_obj):
return isinstance(this_obj, list) and all(isinstance(x, str) for x in this_obj)
return (isinstance(this_obj, list) or isinstance(this_obj, np.ndarray)) and all(
isinstance(x, str) for x in this_obj
)
def _clean_value(value: Any) -> str:
@ -386,14 +427,19 @@ def get_logdir_name(dirname, local_dir):
return logdir
def get_trial_fold_name(local_dir, trial_config, trial_id):
global counter
counter = counter + 1
experiment_tag = "{0}_{1}".format(str(counter), format_vars(trial_config))
logdir = get_logdir_name(
_generate_dirname(experiment_tag, trial_id=trial_id), local_dir
)
return logdir
class Counter:
counter = 0
@staticmethod
def get_trial_fold_name(local_dir, trial_config, trial_id):
Counter.counter += 1
experiment_tag = "{0}_{1}".format(
str(Counter.counter), format_vars(trial_config)
)
logdir = get_logdir_name(
_generate_dirname(experiment_tag, trial_id=trial_id), local_dir
)
return logdir
def load_model(checkpoint_path, task, num_labels, per_model_config=None):
@ -499,7 +545,7 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None):
def compute_checkpoint_freq(
train_data_size,
custom_hpo_args,
hf_args,
num_train_epochs,
batch_size,
):
@ -508,7 +554,7 @@ def compute_checkpoint_freq(
min(num_train_epochs, 1)
* train_data_size
/ batch_size
/ custom_hpo_args.ckpt_per_epoch
/ hf_args.ckpt_per_epoch
)
+ 1
)
@ -516,7 +562,7 @@ def compute_checkpoint_freq(
@dataclass
class HPOArgs:
class HFArgs:
"""The HPO setting.
Args:
output_dir (str): data root directory for outputing the log, etc.
@ -534,7 +580,12 @@ class HPOArgs:
model_path: str = field(
default="facebook/muppet-roberta-base",
metadata={"help": "model path model for HPO"},
metadata={"help": "model path for HPO"},
)
tokenizer_model_path: str = field(
default=None,
metadata={"help": "tokenizer model path for HPO"},
)
fp16: bool = field(default=True, metadata={"help": "whether to use the FP16 mode"})
@ -552,12 +603,17 @@ class HPOArgs:
ckpt_per_epoch: int = field(default=1, metadata={"help": "checkpoint per epoch"})
per_device_eval_batch_size: int = field(
default=1,
metadata={"help": "per gpu evaluation batch size"},
)
@staticmethod
def load_args():
from dataclasses import fields
arg_parser = argparse.ArgumentParser()
for each_field in fields(HPOArgs):
for each_field in fields(HFArgs):
print(each_field)
arg_parser.add_argument(
"--" + each_field.name,

View File

@ -79,7 +79,7 @@ class TrainingLogWriter(object):
sample_size,
):
if self.file is None:
raise IOError("Call open() to open the outpute file first.")
raise IOError("Call open() to open the output file first.")
if validation_loss is None:
raise ValueError("TEST LOSS NONE ERROR!!!")
record = TrainingLogRecord(
@ -109,7 +109,7 @@ class TrainingLogWriter(object):
def checkpoint(self):
if self.file is None:
raise IOError("Call open() to open the outpute file first.")
raise IOError("Call open() to open the output file first.")
if self.current_best_loss_record_id is None:
logger.warning(
"flaml.training_log: checkpoint() called before any record is written, skipped."

1522
notebook/automl_nlp.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -4,12 +4,17 @@ from requests.exceptions import ChunkedEncodingError
def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
from flaml.data import load_openml_dataset
import urllib3
try:
X_train, X_test, y_train, y_test = load_openml_dataset(
dataset_id=1169, data_dir="test/", dataset_format=dataset_format
)
except (OpenMLServerException, ChunkedEncodingError) as e:
except (
OpenMLServerException,
ChunkedEncodingError,
urllib3.exceptions.ReadTimeoutError,
) as e:
print(e)
return
""" import AutoML class from flaml package """

View File

@ -1,7 +1,7 @@
def test_load_args_sub():
from flaml.nlp.utils import HPOArgs
from flaml.nlp.utils import HFArgs
HPOArgs.load_args()
HFArgs.load_args()
if __name__ == "__main__":

View File

@ -84,9 +84,10 @@ def test_hf_data():
"task": "seq-classification",
"metric": "accuracy",
"log_file_name": "seqclass.log",
"use_ray": False,
}
automl_settings["custom_hpo_args"] = {
automl_settings["hf_args"] = {
"model_path": "google/electra-small-discriminator",
"output_dir": "test/data/output/",
"ckpt_per_epoch": 5,
@ -116,7 +117,6 @@ def test_hf_data():
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
with open("automl.pkl", "rb") as f:
automl = pickle.load(f)
shutil.rmtree("test/data/output/")
automl.predict(X_test)
automl.predict(["test test", "test test"])
automl.predict(
@ -164,7 +164,7 @@ def _test_custom_data():
"metric": "accuracy",
}
automl_settings["custom_hpo_args"] = {
automl_settings["hf_args"] = {
"model_path": "google/electra-small-discriminator",
"output_dir": "data/output/",
"ckpt_per_epoch": 1,
@ -183,6 +183,16 @@ def _test_custom_data():
]
)
import pickle
automl.pickle("automl.pkl")
with open("automl.pkl", "rb") as f:
automl = pickle.load(f)
config = automl.best_config.copy()
config["learner"] = automl.best_estimator
automl.trainable(config)
if __name__ == "__main__":
test_hf_data()

View File

@ -52,7 +52,7 @@ def test_classification_head():
"metric": "accuracy",
}
automl_settings["custom_hpo_args"] = {
automl_settings["hf_args"] = {
"model_path": "google/electra-small-discriminator",
"output_dir": "test/data/output/",
"ckpt_per_epoch": 1,

View File

@ -19,8 +19,7 @@ def custom_metric(
from flaml.model import TransformersEstimator
if estimator._trainer is None:
estimator._init_model_for_predict(X_test)
trainer = estimator._trainer
trainer, _, _ = estimator._init_model_for_predict(X_test)
estimator._trainer = None
else:
trainer = estimator._trainer
@ -103,7 +102,7 @@ def test_custom_metric():
"log_file_name": "seqclass.log",
}
automl_settings["custom_hpo_args"] = {
automl_settings["hf_args"] = {
"model_path": "google/electra-small-discriminator",
"output_dir": "data/output/",
"ckpt_per_epoch": 1,

View File

@ -43,7 +43,7 @@ def test_cv():
"n_splits": 3,
}
automl_settings["custom_hpo_args"] = {
automl_settings["hf_args"] = {
"model_path": "google/electra-small-discriminator",
"output_dir": "test/data/output/",
"ckpt_per_epoch": 1,

View File

@ -216,7 +216,7 @@ def test_mcc():
"log_file_name": "seqclass.log",
}
automl_settings["custom_hpo_args"] = {
automl_settings["hf_args"] = {
"model_path": "google/electra-small-discriminator",
"output_dir": "test/data/output/",
"ckpt_per_epoch": 1,

View File

@ -6,6 +6,9 @@ import pytest
def test_regression():
try:
import ray
if not ray.is_initialized():
ray.init()
except ImportError:
return
from flaml import AutoML
@ -65,10 +68,10 @@ def test_regression():
"task": "seq-regression",
"metric": "pearsonr",
"starting_points": {"transformer": {"num_train_epochs": 1}},
"use_ray": True,
"use_ray": {"local_dir": "data/outut/"},
}
automl_settings["custom_hpo_args"] = {
automl_settings["hf_args"] = {
"model_path": "google/electra-small-discriminator",
"output_dir": "test/data/output/",
"ckpt_per_epoch": 1,
@ -77,6 +80,7 @@ def test_regression():
ray.shutdown()
ray.init()
automl.fit(
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
)

View File

@ -58,7 +58,7 @@ def test_summarization():
"log_file_name": "seqclass.log",
}
automl_settings["custom_hpo_args"] = {
automl_settings["hf_args"] = {
"model_path": "patrickvonplaten/t5-tiny-random",
"output_dir": "test/data/output/",
"ckpt_per_epoch": 1,

View File

@ -726,7 +726,7 @@ def test_tokenclassification():
"metric": "seqeval",
}
automl_settings["custom_hpo_args"] = {
automl_settings["hf_args"] = {
"model_path": "bert-base-uncased",
"output_dir": "test/data/output/",
"ckpt_per_epoch": 1,

View File

@ -81,7 +81,7 @@ def _test_hf_data():
"use_ray": True,
}
automl_settings["custom_hpo_args"] = {
automl_settings["hf_args"] = {
"model_path": "facebook/muppet-roberta-base",
"output_dir": "test/data/output/",
"ckpt_per_epoch": 5,

View File

@ -26,8 +26,8 @@ automl = AutoML()
automl_settings = {
"time_budget": 100,
"task": "seq-classification",
"custom_hpo_args": {"output_dir": "data/output/"},
"gpu_per_trial": 1, # set to 0 if no GPU is available
"hf_args": {"output_dir": "data/output/"}, # setting the huggingface arguments: output directory
"gpu_per_trial": 1, # set to 0 if no GPU is available
}
automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
automl.predict(X_test)
@ -77,11 +77,11 @@ automl_settings = {
"task": "seq-regression",
"metric": "rmse",
}
automl_settings["custom_hpo_args"] = {
"model_path": "google/electra-small-discriminator",
"output_dir": "data/output/",
"ckpt_per_epoch": 5,
"fp16": False,
automl_settings["hf_args"] = { # setting the huggingface arguments
"model_path": "google/electra-small-discriminator", # setting the language model
"output_dir": "data/output/", # setting the output directory
"ckpt_per_epoch": 5, # setting the number of checkpoints per epoch
"fp16": False, # setting whether to use FP16
}
automl.fit(
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
@ -127,11 +127,11 @@ automl_settings = {
"task": "summarization",
"metric": "rouge1",
}
automl_settings["custom_hpo_args"] = {
"model_path": "t5-small",
"output_dir": "data/output/",
"ckpt_per_epoch": 5,
"fp16": False,
automl_settings["hf_args"] = { # setting the huggingface arguments
"model_path": "t5-small", # setting the language model
"output_dir": "data/output/", # setting the output directory
"ckpt_per_epoch": 5, # setting the number of checkpoints per epoch
"fp16": False, # setting whether to use FP16
}
automl.fit(
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
@ -205,4 +205,10 @@ Model config T5Config {
}
```
For tasks that are not currently supported, use `flaml.tune` for [customized tuning](Tune-HuggingFace).
For tasks that are not currently supported, use `flaml.tune` for [customized tuning](Tune-HuggingFace).
### Link to Jupyter notebook
To run these examples in our Jupyter notebook, please go to:
[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/automl_nlp.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/automl_nlp.ipynb)