mirror of https://github.com/microsoft/autogen.git
fixing bug for ner (#463)
* fixing bug for ner * removing global var * adding class for trial counter * adding notebook * adding use_ray dict * updating documentation for nlp
This commit is contained in:
parent
5f688c1662
commit
af423463c3
221
flaml/automl.py
221
flaml/automl.py
|
@ -246,6 +246,7 @@ class AutoMLState:
|
|||
* sample_size
|
||||
/ state.data_size[0]
|
||||
)
|
||||
# raise Exception("bbbbb", state.time_budget, budget)
|
||||
|
||||
if _is_nlp_task(state.task):
|
||||
state.fit_kwargs["X_val"] = state.X_val
|
||||
|
@ -326,80 +327,29 @@ class AutoMLState:
|
|||
weight = None
|
||||
if groups is not None:
|
||||
self.fit_kwargs["groups"] = groups
|
||||
|
||||
budget = (
|
||||
None
|
||||
if self.time_budget is None
|
||||
else self.time_budget - self.time_from_start
|
||||
)
|
||||
if (
|
||||
hasattr(self, "resources_per_trial")
|
||||
and self.resources_per_trial.get("gpu", 0) > 0
|
||||
):
|
||||
|
||||
if _is_nlp_task(self.task):
|
||||
use_ray = self.fit_kwargs.get("use_ray")
|
||||
self.fit_kwargs["use_ray"] = True
|
||||
estimator, train_time = train_estimator(
|
||||
X_train=sampled_X_train,
|
||||
y_train=sampled_y_train,
|
||||
config_dic=config,
|
||||
task=self.task,
|
||||
estimator_name=estimator,
|
||||
n_jobs=self.n_jobs,
|
||||
estimator_class=self.learner_classes.get(estimator),
|
||||
budget=budget,
|
||||
fit_kwargs=self.fit_kwargs,
|
||||
eval_metric="train_time",
|
||||
)
|
||||
|
||||
def _trainable_function_wrapper(config: dict):
|
||||
|
||||
return_estimator, train_time = train_estimator(
|
||||
X_train=sampled_X_train,
|
||||
y_train=sampled_y_train,
|
||||
config_dic=config,
|
||||
task=self.task,
|
||||
estimator_name=estimator,
|
||||
n_jobs=self.n_jobs,
|
||||
estimator_class=self.learner_classes.get(estimator),
|
||||
budget=budget,
|
||||
fit_kwargs=self.fit_kwargs,
|
||||
)
|
||||
return {"estimator": return_estimator, "train_time": train_time}
|
||||
|
||||
if estimator not in self.learner_classes:
|
||||
self.learner_classes[estimator] = get_estimator_class(
|
||||
self.task, estimator
|
||||
)
|
||||
|
||||
analysis = tune.run(
|
||||
_trainable_function_wrapper,
|
||||
config=config_w_resource,
|
||||
metric="train_time",
|
||||
mode="min",
|
||||
resources_per_trial=self.resources_per_trial,
|
||||
num_samples=1,
|
||||
use_ray=True,
|
||||
)
|
||||
result = list(analysis.results.values())[0]
|
||||
estimator, train_time = result["estimator"], result["train_time"]
|
||||
|
||||
if _is_nlp_task(self.task):
|
||||
if use_ray is None:
|
||||
del self.fit_kwargs["use_ray"]
|
||||
else:
|
||||
self.fit_kwargs["use_ray"] = use_ray
|
||||
estimator.use_ray = False
|
||||
else:
|
||||
if _is_nlp_task(self.task):
|
||||
use_ray = self.fit_kwargs.get("use_ray")
|
||||
self.fit_kwargs["use_ray"] = False
|
||||
estimator, train_time = train_estimator(
|
||||
X_train=sampled_X_train,
|
||||
y_train=sampled_y_train,
|
||||
config_dic=config,
|
||||
task=self.task,
|
||||
estimator_name=estimator,
|
||||
n_jobs=self.n_jobs,
|
||||
estimator_class=self.learner_classes.get(estimator),
|
||||
budget=budget,
|
||||
fit_kwargs=self.fit_kwargs,
|
||||
)
|
||||
if _is_nlp_task(self.task):
|
||||
if use_ray is None:
|
||||
del self.fit_kwargs["use_ray"]
|
||||
else:
|
||||
self.fit_kwargs["use_ray"] = use_ray
|
||||
if sampled_weight is not None:
|
||||
self.fit_kwargs["sample_weight"] = weight
|
||||
|
||||
return estimator, train_time
|
||||
|
||||
|
||||
|
@ -749,7 +699,11 @@ class AutoML(BaseEstimator):
|
|||
"""Time taken to find best model in seconds."""
|
||||
return self.__dict__.get("_time_taken_best_iter")
|
||||
|
||||
def predict(self, X: Union[np.array, pd.DataFrame, List[str], List[List[str]]]):
|
||||
def predict(
|
||||
self,
|
||||
X: Union[np.array, pd.DataFrame, List[str], List[List[str]]],
|
||||
**pred_kwargs,
|
||||
):
|
||||
"""Predict label from features.
|
||||
|
||||
Args:
|
||||
|
@ -761,6 +715,8 @@ class AutoML(BaseEstimator):
|
|||
arima or sarimax). Other columns in the dataframe
|
||||
are assumed to be exogenous variables (categorical
|
||||
or numeric).
|
||||
**pred_kwargs: Other key word arguments to pass to predict() function of
|
||||
the searched learners, such as per_device_eval_batch_size.
|
||||
|
||||
```python
|
||||
multivariate_X_test = pd.DataFrame({
|
||||
|
@ -782,7 +738,7 @@ class AutoML(BaseEstimator):
|
|||
)
|
||||
return None
|
||||
X = self._preprocess(X)
|
||||
y_pred = estimator.predict(X)
|
||||
y_pred = estimator.predict(X, **pred_kwargs)
|
||||
if (
|
||||
isinstance(y_pred, np.ndarray)
|
||||
and y_pred.ndim > 1
|
||||
|
@ -796,12 +752,14 @@ class AutoML(BaseEstimator):
|
|||
else:
|
||||
return y_pred
|
||||
|
||||
def predict_proba(self, X):
|
||||
def predict_proba(self, X, **pred_kwargs):
|
||||
"""Predict the probability of each class from features, only works for
|
||||
classification problems.
|
||||
|
||||
Args:
|
||||
X: A numpy array of featurized instances, shape n * m.
|
||||
**pred_kwargs: Other key word arguments to pass to predict_proba() function of
|
||||
the searched learners, such as per_device_eval_batch_size.
|
||||
|
||||
Returns:
|
||||
A numpy array of shape n * c. c is the # classes. Each element at
|
||||
|
@ -814,7 +772,7 @@ class AutoML(BaseEstimator):
|
|||
)
|
||||
return None
|
||||
X = self._preprocess(X)
|
||||
proba = self._trained_estimator.predict_proba(X)
|
||||
proba = self._trained_estimator.predict_proba(X, **pred_kwargs)
|
||||
return proba
|
||||
|
||||
def _preprocess(self, X):
|
||||
|
@ -1319,6 +1277,7 @@ class AutoML(BaseEstimator):
|
|||
task=task,
|
||||
estimator_name=estimator,
|
||||
estimator_class=self._state.learner_classes.get(estimator),
|
||||
eval_metric="train_time",
|
||||
)
|
||||
return estimator
|
||||
|
||||
|
@ -1680,6 +1639,17 @@ class AutoML(BaseEstimator):
|
|||
"""
|
||||
return self._state.data_size[0] if self._sample else None
|
||||
|
||||
def pickle(self, output_file_name):
|
||||
import pickle
|
||||
|
||||
estimator_to_training_function = {}
|
||||
for estimator in self.estimator_list:
|
||||
search_state = self._search_states[estimator]
|
||||
estimator_to_training_function[estimator] = search_state.training_function
|
||||
del search_state.training_function
|
||||
with open(output_file_name, "wb") as f:
|
||||
pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
@property
|
||||
def trainable(self) -> Callable[[dict], Optional[float]]:
|
||||
"""Training function.
|
||||
|
@ -1960,10 +1930,10 @@ class AutoML(BaseEstimator):
|
|||
augment rare classes.
|
||||
min_sample_size: int, default=MIN_SAMPLE_TRAIN | the minimal sample
|
||||
size when sample=True.
|
||||
use_ray: boolean, default=False | Whether to use ray to run the training
|
||||
use_ray: boolean or dict
|
||||
If boolean: default=False | Whether to use ray to run the training
|
||||
in separate processes. This can be used to prevent OOM for large
|
||||
datasets, but will incur more overhead in time. Only use it if
|
||||
you run into OOM failures.
|
||||
datasets, but will incur more overhead in time.
|
||||
metric_constraints: list, default=[] | The list of metric constraints.
|
||||
Each element in this list is a 3-tuple, which shall be expressed
|
||||
in the following format: the first element of the 3-tuple is the name of the
|
||||
|
@ -2064,14 +2034,21 @@ class AutoML(BaseEstimator):
|
|||
import ray
|
||||
|
||||
n_cpus = use_ray and ray.available_resources()["CPU"] or os.cpu_count()
|
||||
|
||||
self._state.resources_per_trial = (
|
||||
# when using gpu, default cpu is 1 per job; otherwise, default cpu is n_cpus / n_concurrent_trials
|
||||
{"cpu": max(int(n_cpus / n_concurrent_trials), 1), "gpu": gpu_per_trial}
|
||||
if gpu_per_trial == 0
|
||||
else {"cpu": 1, "gpu": gpu_per_trial}
|
||||
(
|
||||
{
|
||||
"cpu": max(int((n_cpus - 2) / 2 / n_concurrent_trials), 1),
|
||||
"gpu": gpu_per_trial,
|
||||
}
|
||||
if gpu_per_trial == 0
|
||||
else {"cpu": 1, "gpu": gpu_per_trial}
|
||||
)
|
||||
if n_jobs < 0
|
||||
else {"cpu": n_jobs, "gpu": gpu_per_trial}
|
||||
)
|
||||
|
||||
if isinstance(X_train, ray.ObjectRef):
|
||||
X_train = ray.get(X_train)
|
||||
elif isinstance(dataframe, ray.ObjectRef):
|
||||
|
@ -2131,7 +2108,11 @@ class AutoML(BaseEstimator):
|
|||
)
|
||||
)
|
||||
if "auto" == metric:
|
||||
if "binary" in self._state.task:
|
||||
if _is_nlp_task(self._state.task):
|
||||
from .nlp.utils import load_default_huggingface_metric_for_task
|
||||
|
||||
metric = load_default_huggingface_metric_for_task(self._state.task)
|
||||
elif "binary" in self._state.task:
|
||||
metric = "roc_auc"
|
||||
elif "multi" in self._state.task:
|
||||
metric = "log_loss"
|
||||
|
@ -2139,17 +2120,9 @@ class AutoML(BaseEstimator):
|
|||
metric = "mape"
|
||||
elif self._state.task == "rank":
|
||||
metric = "ndcg"
|
||||
elif _is_nlp_task(self._state.task):
|
||||
from .nlp.utils import load_default_huggingface_metric_for_task
|
||||
|
||||
metric = load_default_huggingface_metric_for_task(self._state.task)
|
||||
else:
|
||||
metric = "r2"
|
||||
|
||||
if _is_nlp_task(self._state.task):
|
||||
self._state.fit_kwargs["metric"] = metric
|
||||
self._state.fit_kwargs["use_ray"] = self._use_ray
|
||||
|
||||
self._state.metric = metric
|
||||
|
||||
def is_to_reverse_metric(metric, task):
|
||||
|
@ -2355,6 +2328,14 @@ class AutoML(BaseEstimator):
|
|||
elif "random" == self._hpo_method:
|
||||
from ray.tune.suggest import BasicVariantGenerator as SearchAlgo
|
||||
from ray.tune.sample import Domain
|
||||
elif "optuna" == self._hpo_method:
|
||||
try:
|
||||
from ray import __version__ as ray_version
|
||||
|
||||
assert ray_version >= "1.0.0"
|
||||
from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo
|
||||
except (ImportError, AssertionError):
|
||||
from .searcher.suggestion import OptunaSearch as SearchAlgo
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"hpo_method={self._hpo_method} is not recognized. "
|
||||
|
@ -2382,24 +2363,48 @@ class AutoML(BaseEstimator):
|
|||
else:
|
||||
self._state.time_from_start = time.time() - self._start_time_flag
|
||||
time_left = self._state.time_budget - self._state.time_from_start
|
||||
search_alg = SearchAlgo(
|
||||
metric="val_loss",
|
||||
space=space,
|
||||
low_cost_partial_config=self.low_cost_partial_config,
|
||||
points_to_evaluate=self.points_to_evaluate,
|
||||
cat_hp_cost=self.cat_hp_cost,
|
||||
resource_attr=self.resource_attr,
|
||||
min_resource=self.min_resource,
|
||||
max_resource=self.max_resource,
|
||||
config_constraints=[
|
||||
(partial(size, self._state), "<=", self._mem_thres)
|
||||
],
|
||||
metric_constraints=self.metric_constraints,
|
||||
seed=self._seed,
|
||||
time_budget_s=time_left,
|
||||
)
|
||||
if self._hpo_method != "optuna":
|
||||
search_alg = SearchAlgo(
|
||||
metric="val_loss",
|
||||
space=space,
|
||||
low_cost_partial_config=self.low_cost_partial_config,
|
||||
points_to_evaluate=self.points_to_evaluate,
|
||||
cat_hp_cost=self.cat_hp_cost,
|
||||
resource_attr=self.resource_attr,
|
||||
min_resource=self.min_resource,
|
||||
max_resource=self.max_resource,
|
||||
config_constraints=[
|
||||
(partial(size, self._state), "<=", self._mem_thres)
|
||||
],
|
||||
metric_constraints=self.metric_constraints,
|
||||
seed=self._seed,
|
||||
time_budget_s=time_left,
|
||||
)
|
||||
else:
|
||||
# if self._hpo_method is bo, sometimes the search space and the initial config dimension do not match
|
||||
# need to remove the extra keys from the search space to be consistent with the initial config
|
||||
converted_space = SearchAlgo.convert_search_space(space)
|
||||
|
||||
removed_keys = set(space.keys()).difference(converted_space.keys())
|
||||
new_points_to_evaluate = []
|
||||
for idx in range(len(self.points_to_evaluate)):
|
||||
r = self.points_to_evaluate[idx].copy()
|
||||
for each_key in removed_keys:
|
||||
r.pop(each_key)
|
||||
new_points_to_evaluate.append(r)
|
||||
|
||||
search_alg = SearchAlgo(
|
||||
metric="val_loss",
|
||||
mode="min",
|
||||
points_to_evaluate=[
|
||||
p
|
||||
for p in new_points_to_evaluate
|
||||
if len(p) == len(converted_space)
|
||||
],
|
||||
)
|
||||
search_alg = ConcurrencyLimiter(search_alg, self._n_concurrent_trials)
|
||||
resources_per_trial = self._state.resources_per_trial
|
||||
|
||||
analysis = ray.tune.run(
|
||||
self.trainable,
|
||||
search_alg=search_alg,
|
||||
|
@ -2413,6 +2418,7 @@ class AutoML(BaseEstimator):
|
|||
raise_on_failed_trial=False,
|
||||
keep_checkpoints_num=1,
|
||||
checkpoint_score_attr="min-val_loss",
|
||||
**self._use_ray if isinstance(self._use_ray, dict) else {},
|
||||
)
|
||||
# logger.info([trial.last_result for trial in analysis.trials])
|
||||
trials = sorted(
|
||||
|
@ -2579,6 +2585,7 @@ class AutoML(BaseEstimator):
|
|||
if isinstance(search_state.init_config, list)
|
||||
else [search_state.init_config]
|
||||
)
|
||||
|
||||
low_cost_partial_config = search_state.low_cost_partial_config
|
||||
if self._hpo_method in ("bs", "cfo", "grid", "cfocat", "random"):
|
||||
algo = SearchAlgo(
|
||||
|
@ -2598,6 +2605,20 @@ class AutoML(BaseEstimator):
|
|||
seed=self._seed,
|
||||
)
|
||||
else:
|
||||
# if self._hpo_method is bo, sometimes the search space and the initial config dimension do not match
|
||||
# need to remove the extra keys from the search space to be consistent with the initial config
|
||||
converted_space = SearchAlgo.convert_search_space(search_space)
|
||||
removed_keys = set(search_space.keys()).difference(
|
||||
converted_space.keys()
|
||||
)
|
||||
new_points_to_evaluate = []
|
||||
for idx in range(len(points_to_evaluate)):
|
||||
r = points_to_evaluate[idx].copy()
|
||||
for each_key in removed_keys:
|
||||
r.pop(each_key)
|
||||
new_points_to_evaluate.append(r)
|
||||
points_to_evaluate = new_points_to_evaluate
|
||||
|
||||
algo = SearchAlgo(
|
||||
metric="val_loss",
|
||||
mode="min",
|
||||
|
|
|
@ -397,6 +397,7 @@ def get_val_loss(
|
|||
# fit_kwargs['groups_val'] = groups_val
|
||||
# fit_kwargs['X_val'] = X_val
|
||||
# fit_kwargs['y_val'] = y_val
|
||||
|
||||
estimator.fit(X_train, y_train, budget, **fit_kwargs)
|
||||
val_loss, metric_for_logging, pred_time, _ = _eval_estimator(
|
||||
config,
|
||||
|
@ -561,6 +562,10 @@ def compute_estimator(
|
|||
task=task,
|
||||
n_jobs=n_jobs,
|
||||
)
|
||||
|
||||
if isinstance(estimator, TransformersEstimator):
|
||||
fit_kwargs["metric"] = eval_metric
|
||||
|
||||
if "holdout" == eval_method:
|
||||
val_loss, metric_for_logging, train_time, pred_time = get_val_loss(
|
||||
config_dic,
|
||||
|
@ -604,6 +609,7 @@ def train_estimator(
|
|||
estimator_class=None,
|
||||
budget=None,
|
||||
fit_kwargs={},
|
||||
eval_metric=None,
|
||||
):
|
||||
start_time = time.time()
|
||||
estimator_class = estimator_class or get_estimator_class(task, estimator_name)
|
||||
|
@ -612,6 +618,9 @@ def train_estimator(
|
|||
task=task,
|
||||
n_jobs=n_jobs,
|
||||
)
|
||||
if isinstance(estimator, TransformersEstimator):
|
||||
fit_kwargs["metric"] = eval_metric
|
||||
|
||||
if X_train is not None:
|
||||
train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
|
||||
else:
|
||||
|
|
291
flaml/model.py
291
flaml/model.py
|
@ -197,7 +197,7 @@ class BaseEstimator:
|
|||
train_time = self._fit(X_train, y_train, **kwargs)
|
||||
return train_time
|
||||
|
||||
def predict(self, X):
|
||||
def predict(self, X, **kwargs):
|
||||
"""Predict label from features.
|
||||
|
||||
Args:
|
||||
|
@ -216,7 +216,7 @@ class BaseEstimator:
|
|||
)
|
||||
return np.ones(X.shape[0])
|
||||
|
||||
def predict_proba(self, X):
|
||||
def predict_proba(self, X, **kwargs):
|
||||
"""Predict the probability of each class from features.
|
||||
|
||||
Only works for classification problems
|
||||
|
@ -325,7 +325,7 @@ class TransformersEstimator(BaseEstimator):
|
|||
},
|
||||
"num_train_epochs": {
|
||||
"domain": tune.loguniform(lower=0.1, upper=10.0),
|
||||
"init_value": 3,
|
||||
"init_value": 1,
|
||||
},
|
||||
"per_device_train_batch_size": {
|
||||
"domain": tune.choice([4, 8, 16, 32]),
|
||||
|
@ -344,33 +344,38 @@ class TransformersEstimator(BaseEstimator):
|
|||
"init_value": 1e-6,
|
||||
},
|
||||
"seed": {"domain": tune.choice(list(range(40, 45))), "init_value": 42},
|
||||
"global_max_steps": {"domain": sys.maxsize, "init_value": sys.maxsize},
|
||||
"global_max_steps": {
|
||||
"domain": sys.maxsize,
|
||||
"init_value": sys.maxsize,
|
||||
},
|
||||
}
|
||||
|
||||
if task in NLG_TASKS:
|
||||
search_space_dict["generation_num_beams"] = {
|
||||
"domain": tune.randint(2, 5),
|
||||
"init_value": 3,
|
||||
}
|
||||
search_space_dict["generation_max_length"] = {
|
||||
"domain": tune.choice([16, 32, 64, 128]),
|
||||
"init_value": 64,
|
||||
}
|
||||
|
||||
return search_space_dict
|
||||
|
||||
def _init_hpo_args(self, automl_fit_kwargs: dict = None):
|
||||
from .nlp.utils import HPOArgs
|
||||
def _init_hf_args(self, automl_fit_kwargs: dict = None):
|
||||
from .nlp.utils import HFArgs
|
||||
|
||||
custom_hpo_args = HPOArgs()
|
||||
for key, val in automl_fit_kwargs["custom_hpo_args"].items():
|
||||
hf_args = HFArgs()
|
||||
for key, val in automl_fit_kwargs["hf_args"].items():
|
||||
assert (
|
||||
key in custom_hpo_args.__dict__
|
||||
), "The specified key {} is not in the argument list of flaml.nlp.utils::HPOArgs".format(
|
||||
key in hf_args.__dict__
|
||||
), "The specified key {} is not in the argument list of flaml.nlp.utils::HFArgs".format(
|
||||
key
|
||||
)
|
||||
setattr(custom_hpo_args, key, val)
|
||||
self.custom_hpo_args = custom_hpo_args
|
||||
setattr(hf_args, key, val)
|
||||
self.hf_args = hf_args
|
||||
|
||||
def _update_hf_args(self, automl_pred_kwargs: dict = None):
|
||||
if automl_pred_kwargs:
|
||||
hf_args = automl_pred_kwargs.get("hf_args")
|
||||
if hf_args:
|
||||
for key, val in hf_args.items():
|
||||
assert (
|
||||
key in self.hf_args.__dict__
|
||||
), "The specified key {} is not in the argument list of flaml.nlp.utils::HFArgs".format(
|
||||
key
|
||||
)
|
||||
setattr(self.hf_args, key, val)
|
||||
|
||||
def _preprocess(self, X, y=None, **kwargs):
|
||||
from .nlp.utils import tokenize_text, is_a_list_of_str
|
||||
|
@ -383,7 +388,7 @@ class TransformersEstimator(BaseEstimator):
|
|||
X=X,
|
||||
Y=y,
|
||||
task=self._task,
|
||||
custom_hpo_args=self.custom_hpo_args,
|
||||
hf_args=self.hf_args,
|
||||
tokenizer=self._tokenizer,
|
||||
)
|
||||
else:
|
||||
|
@ -392,12 +397,63 @@ class TransformersEstimator(BaseEstimator):
|
|||
def _model_init(self, num_labels, per_model_config):
|
||||
from .nlp.utils import load_model
|
||||
|
||||
return load_model(
|
||||
checkpoint_path=self.custom_hpo_args.model_path,
|
||||
this_model = load_model(
|
||||
checkpoint_path=self.hf_args.model_path,
|
||||
task=self._task,
|
||||
num_labels=num_labels,
|
||||
per_model_config=per_model_config,
|
||||
)
|
||||
return this_model
|
||||
|
||||
def _get_training_args(self, local_rank=-1):
|
||||
import transformers
|
||||
|
||||
if self._task in NLG_TASKS:
|
||||
self._training_args_config["predict_with_generate"] = True
|
||||
|
||||
if transformers.__version__.startswith("3"):
|
||||
training_args = self._TrainingArguments(
|
||||
report_to=[],
|
||||
output_dir=self._trial_dir,
|
||||
do_train=True,
|
||||
do_eval=True,
|
||||
eval_steps=self._ckpt_freq,
|
||||
evaluate_during_training=True,
|
||||
save_steps=self._ckpt_freq,
|
||||
logging_steps=self._ckpt_freq,
|
||||
save_total_limit=0,
|
||||
metric_for_best_model="loss",
|
||||
fp16=self.hf_args.fp16
|
||||
if self._kwargs.get("gpu_per_trial") > 0
|
||||
else False,
|
||||
no_cuda=True if self._kwargs.get("gpu_per_trial") == 0 else False,
|
||||
local_rank=local_rank,
|
||||
per_device_eval_batch_size=self.hf_args.per_device_eval_batch_size,
|
||||
**self._training_args_config,
|
||||
)
|
||||
else:
|
||||
from transformers import IntervalStrategy
|
||||
|
||||
training_args = self._TrainingArguments(
|
||||
report_to=[],
|
||||
output_dir=self._trial_dir,
|
||||
do_train=True,
|
||||
do_eval=True,
|
||||
eval_steps=self._ckpt_freq,
|
||||
logging_steps=self._ckpt_freq,
|
||||
evaluation_strategy=IntervalStrategy.STEPS,
|
||||
save_steps=self._ckpt_freq,
|
||||
save_total_limit=0,
|
||||
metric_for_best_model="loss",
|
||||
fp16=self.hf_args.fp16
|
||||
if self._kwargs.get("gpu_per_trial") > 0
|
||||
else False,
|
||||
local_rank=local_rank,
|
||||
no_cuda=True if self._kwargs.get("gpu_per_trial") == 0 else False,
|
||||
per_device_eval_batch_size=self.hf_args.per_device_eval_batch_size,
|
||||
**self._training_args_config,
|
||||
)
|
||||
return training_args
|
||||
|
||||
def fit(self, X_train: DataFrame, y_train: Series, budget=None, **kwargs):
|
||||
import transformers
|
||||
|
@ -411,18 +467,11 @@ class TransformersEstimator(BaseEstimator):
|
|||
from .nlp.utils import (
|
||||
get_num_labels,
|
||||
separate_config,
|
||||
load_model,
|
||||
compute_checkpoint_freq,
|
||||
get_trial_fold_name,
|
||||
Counter,
|
||||
date_str,
|
||||
)
|
||||
|
||||
# TODO: if self._task == QUESTIONANSWERING, uncomment the code below (add indentation before
|
||||
# from .nlp.huggingface.trainer import TrainerForAuto)
|
||||
|
||||
# if self._task in NLG_TASKS:
|
||||
# from .nlp.huggingface.trainer import Seq2SeqTrainerForAuto as TrainerForAuto
|
||||
# else:
|
||||
from .nlp.huggingface.trainer import TrainerForAuto
|
||||
from .nlp.huggingface.data_collator import DataCollatorForAuto
|
||||
from .nlp.utils import get_auto_tokenizer
|
||||
|
@ -462,13 +511,22 @@ class TransformersEstimator(BaseEstimator):
|
|||
|
||||
set_seed(self.params.get("seed", self._TrainingArguments.seed))
|
||||
|
||||
self._init_hpo_args(kwargs)
|
||||
self._init_hf_args(kwargs)
|
||||
self._tokenizer = get_auto_tokenizer(
|
||||
self.custom_hpo_args.model_path, self._task
|
||||
self.hf_args.tokenizer_model_path
|
||||
if self.hf_args.tokenizer_model_path
|
||||
else self.hf_args.model_path,
|
||||
self._task,
|
||||
)
|
||||
|
||||
self._metric = kwargs["metric"]
|
||||
self.use_ray = kwargs.get("use_ray")
|
||||
|
||||
try:
|
||||
from ray.tune import is_session_enabled
|
||||
|
||||
self.use_ray = is_session_enabled()
|
||||
except ImportError:
|
||||
self.use_ray = False
|
||||
|
||||
X_val = kwargs.get("X_val")
|
||||
y_val = kwargs.get("y_val")
|
||||
|
@ -498,70 +556,41 @@ class TransformersEstimator(BaseEstimator):
|
|||
eval_dataset = None
|
||||
|
||||
num_labels = get_num_labels(self._task, self._y_train)
|
||||
training_args_config, per_model_config = separate_config(
|
||||
self._training_args_config, self._per_model_config = separate_config(
|
||||
self.params, self._task
|
||||
)
|
||||
ckpt_freq = compute_checkpoint_freq(
|
||||
self._ckpt_freq = compute_checkpoint_freq(
|
||||
train_data_size=len(self._X_train),
|
||||
custom_hpo_args=self.custom_hpo_args,
|
||||
num_train_epochs=training_args_config.get(
|
||||
hf_args=self.hf_args,
|
||||
num_train_epochs=self._training_args_config.get(
|
||||
"num_train_epochs", self._TrainingArguments.num_train_epochs
|
||||
),
|
||||
batch_size=training_args_config.get(
|
||||
batch_size=self._training_args_config.get(
|
||||
"per_device_train_batch_size",
|
||||
self._TrainingArguments.per_device_train_batch_size,
|
||||
),
|
||||
)
|
||||
|
||||
local_dir = os.path.join(
|
||||
self.custom_hpo_args.output_dir, "train_{}".format(date_str())
|
||||
)
|
||||
local_dir = os.path.join(self.hf_args.output_dir, "train_{}".format(date_str()))
|
||||
|
||||
if not self.use_ray:
|
||||
# if self.params = {}, don't include configuration in trial fold name
|
||||
trial_dir = get_trial_fold_name(local_dir, self.params, self.trial_id)
|
||||
else:
|
||||
if self.use_ray is True:
|
||||
import ray
|
||||
|
||||
trial_dir = ray.tune.get_trial_dir()
|
||||
|
||||
if transformers.__version__.startswith("3"):
|
||||
training_args = self._TrainingArguments(
|
||||
report_to=[],
|
||||
output_dir=trial_dir,
|
||||
do_train=True,
|
||||
do_eval=True,
|
||||
eval_steps=ckpt_freq,
|
||||
evaluate_during_training=True,
|
||||
save_steps=ckpt_freq,
|
||||
logging_steps=ckpt_freq,
|
||||
save_total_limit=0,
|
||||
metric_for_best_model="loss",
|
||||
fp16=self.custom_hpo_args.fp16,
|
||||
**training_args_config,
|
||||
)
|
||||
self._trial_dir = ray.tune.get_trial_dir()
|
||||
else:
|
||||
from transformers import IntervalStrategy
|
||||
|
||||
training_args = self._TrainingArguments(
|
||||
report_to=[],
|
||||
output_dir=trial_dir,
|
||||
do_train=True,
|
||||
do_eval=True,
|
||||
per_device_eval_batch_size=1,
|
||||
eval_steps=ckpt_freq,
|
||||
logging_steps=ckpt_freq,
|
||||
evaluation_strategy=IntervalStrategy.STEPS,
|
||||
save_steps=ckpt_freq,
|
||||
save_total_limit=0,
|
||||
metric_for_best_model="loss",
|
||||
fp16=self.custom_hpo_args.fp16,
|
||||
**training_args_config,
|
||||
# if self.params = {}, don't include configuration in trial fold name
|
||||
self._trial_dir = Counter.get_trial_fold_name(
|
||||
local_dir, self.params, self.trial_id
|
||||
)
|
||||
|
||||
self._kwargs = kwargs
|
||||
self._num_labels = num_labels
|
||||
|
||||
training_args = self._get_training_args(local_rank=-1)
|
||||
|
||||
self._trainer = TrainerForAuto(
|
||||
args=training_args,
|
||||
model_init=partial(self._model_init, num_labels, per_model_config),
|
||||
model_init=partial(self._model_init, num_labels, self._per_model_config),
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset=eval_dataset,
|
||||
tokenizer=self._tokenizer,
|
||||
|
@ -575,28 +604,33 @@ class TransformersEstimator(BaseEstimator):
|
|||
callbacks=[EarlyStoppingCallbackForAuto],
|
||||
)
|
||||
|
||||
setattr(self._trainer, "_use_ray", self.use_ray)
|
||||
if self._task in NLG_TASKS:
|
||||
setattr(self._trainer, "_is_seq2seq", True)
|
||||
if kwargs.get("gpu_per_trial"):
|
||||
self._trainer.args._n_gpu = kwargs.get("gpu_per_trial")
|
||||
|
||||
gpu_per_trial = kwargs.get("gpu_per_trial", None)
|
||||
if gpu_per_trial:
|
||||
tmp_cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "")
|
||||
self._trainer.args._n_gpu = gpu_per_trial
|
||||
# if gpu_per_trial == 0:
|
||||
# os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
||||
if tmp_cuda_visible_devices.count(",") != gpu_per_trial - 1:
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
|
||||
[str(x) for x in range(gpu_per_trial)]
|
||||
)
|
||||
|
||||
import time
|
||||
|
||||
start_time = time.time()
|
||||
self._trainer.train()
|
||||
|
||||
if gpu_per_trial:
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = tmp_cuda_visible_devices
|
||||
|
||||
self.params[self.ITER_HP] = self._trainer.state.global_step
|
||||
|
||||
self._checkpoint_path = self._select_checkpoint(self._trainer)
|
||||
|
||||
self._kwargs = kwargs
|
||||
self._num_labels = num_labels
|
||||
self._per_model_config = per_model_config
|
||||
self._training_args_config = training_args_config
|
||||
|
||||
self._ckpt_remains = list(self._trainer.ckpt_to_metric.keys())
|
||||
self._model = load_model(
|
||||
checkpoint_path=self._checkpoint_path,
|
||||
task=self._task,
|
||||
num_labels=self._num_labels,
|
||||
per_model_config=self._per_model_config,
|
||||
)
|
||||
|
||||
if hasattr(self._trainer, "intermediate_results"):
|
||||
self.intermediate_results = [
|
||||
x[1]
|
||||
|
@ -605,6 +639,7 @@ class TransformersEstimator(BaseEstimator):
|
|||
)
|
||||
]
|
||||
self._trainer = None
|
||||
return time.time() - start_time
|
||||
|
||||
def _delete_one_ckpt(self, ckpt_location):
|
||||
if self.use_ray is False:
|
||||
|
@ -689,16 +724,21 @@ class TransformersEstimator(BaseEstimator):
|
|||
from datasets import Dataset
|
||||
from .nlp.huggingface.trainer import TrainerForAuto
|
||||
from .nlp.huggingface.data_collator import DataCollatorForPredict
|
||||
from .nlp.utils import load_model
|
||||
|
||||
X_test, _ = self._preprocess(X_test, **self._kwargs)
|
||||
test_dataset = Dataset.from_pandas(X_test)
|
||||
training_args = self._TrainingArguments(
|
||||
per_device_eval_batch_size=1,
|
||||
output_dir=self.custom_hpo_args.output_dir,
|
||||
**self._training_args_config,
|
||||
|
||||
this_model = load_model(
|
||||
checkpoint_path=self._checkpoint_path,
|
||||
task=self._task,
|
||||
num_labels=self._num_labels,
|
||||
per_model_config=self._per_model_config,
|
||||
)
|
||||
self._trainer = TrainerForAuto(
|
||||
model=self._model,
|
||||
training_args = self._get_training_args(local_rank=-1)
|
||||
|
||||
new_trainer = TrainerForAuto(
|
||||
model=this_model,
|
||||
args=training_args,
|
||||
data_collator=DataCollatorForPredict(
|
||||
tokenizer=self._tokenizer,
|
||||
|
@ -708,31 +748,36 @@ class TransformersEstimator(BaseEstimator):
|
|||
else None,
|
||||
compute_metrics=self._compute_metrics_by_dataset_name,
|
||||
)
|
||||
return test_dataset, training_args
|
||||
if self._task in NLG_TASKS:
|
||||
setattr(new_trainer, "_is_seq2seq", True)
|
||||
return new_trainer, test_dataset, training_args
|
||||
|
||||
def predict_proba(self, X):
|
||||
def predict_proba(self, X, **kwargs):
|
||||
self._update_hf_args(kwargs)
|
||||
assert (
|
||||
self._task in CLASSIFICATION
|
||||
), "predict_proba() only for classification tasks."
|
||||
|
||||
test_dataset, _ = self._init_model_for_predict(X)
|
||||
predictions = self._trainer.predict(test_dataset)
|
||||
if self.use_ray is True:
|
||||
self._trainer = None
|
||||
new_trainer, test_dataset, _ = self._init_model_for_predict(X)
|
||||
predictions = new_trainer.predict(test_dataset)
|
||||
return predictions.predictions
|
||||
|
||||
def predict(self, X):
|
||||
test_dataset, training_args = self._init_model_for_predict(X)
|
||||
def predict(self, X, **kwargs):
|
||||
import transformers
|
||||
|
||||
transformers.logging.set_verbosity_error()
|
||||
|
||||
self._update_hf_args(kwargs)
|
||||
new_trainer, test_dataset, training_args = self._init_model_for_predict(X)
|
||||
|
||||
if self._task not in NLG_TASKS:
|
||||
predictions = self._trainer.predict(test_dataset)
|
||||
predictions = new_trainer.predict(test_dataset)
|
||||
else:
|
||||
predictions = self._trainer.predict(
|
||||
predictions = new_trainer.predict(
|
||||
test_dataset,
|
||||
max_length=training_args.generation_max_length,
|
||||
num_beams=training_args.generation_num_beams,
|
||||
metric_key_prefix="predict",
|
||||
)
|
||||
if self.use_ray is True:
|
||||
self._trainer = None
|
||||
|
||||
if self._task == SEQCLASSIFICATION:
|
||||
return np.argmax(predictions.predictions, axis=1)
|
||||
elif self._task == SEQREGRESSION:
|
||||
|
@ -740,10 +785,8 @@ class TransformersEstimator(BaseEstimator):
|
|||
elif self._task == TOKENCLASSIFICATION:
|
||||
return np.argmax(predictions.predictions, axis=2)
|
||||
elif self._task == SUMMARIZATION:
|
||||
if isinstance(predictions.predictions, tuple):
|
||||
predictions = np.argmax(predictions.predictions[0], axis=2)
|
||||
decoded_preds = self._tokenizer.batch_decode(
|
||||
predictions, skip_special_tokens=True
|
||||
predictions.predictions, skip_special_tokens=True
|
||||
)
|
||||
return decoded_preds
|
||||
elif self._task == MULTICHOICECLASSIFICATION:
|
||||
|
@ -1121,7 +1164,7 @@ class XGBoostEstimator(SKLearnEstimator):
|
|||
train_time = time.time() - start_time
|
||||
return train_time
|
||||
|
||||
def predict(self, X):
|
||||
def predict(self, X, **kwargs):
|
||||
import xgboost as xgb
|
||||
|
||||
if not issparse(X):
|
||||
|
@ -1617,7 +1660,7 @@ class Prophet(SKLearnEstimator):
|
|||
self._model = model
|
||||
return train_time
|
||||
|
||||
def predict(self, X):
|
||||
def predict(self, X, **kwargs):
|
||||
if isinstance(X, int):
|
||||
raise ValueError(
|
||||
"predict() with steps is only supported for arima/sarimax."
|
||||
|
@ -1697,7 +1740,7 @@ class ARIMA(Prophet):
|
|||
self._model = model
|
||||
return train_time
|
||||
|
||||
def predict(self, X):
|
||||
def predict(self, X, **kwargs):
|
||||
if self._model is not None:
|
||||
if isinstance(X, int):
|
||||
forecast = self._model.forecast(steps=X)
|
||||
|
@ -1894,7 +1937,7 @@ class TS_SKLearn(SKLearnEstimator):
|
|||
train_time = time.time() - current_time
|
||||
return train_time
|
||||
|
||||
def predict(self, X):
|
||||
def predict(self, X, **kwargs):
|
||||
if self._model is not None:
|
||||
X = self.transform_X(X)
|
||||
X = self._preprocess(X)
|
||||
|
|
|
@ -2,6 +2,7 @@ import argparse
|
|||
from dataclasses import dataclass, field
|
||||
from itertools import chain
|
||||
from typing import Dict, Any
|
||||
import numpy as np
|
||||
|
||||
from ..data import (
|
||||
SUMMARIZATION,
|
||||
|
@ -20,61 +21,54 @@ def load_default_huggingface_metric_for_task(task):
|
|||
elif task == SEQREGRESSION:
|
||||
return "r2"
|
||||
elif task == SUMMARIZATION:
|
||||
return "rouge"
|
||||
return "rouge1"
|
||||
elif task == MULTICHOICECLASSIFICATION:
|
||||
return "accuracy"
|
||||
elif task == TOKENCLASSIFICATION:
|
||||
return "seqeval"
|
||||
|
||||
|
||||
global tokenized_column_names
|
||||
|
||||
|
||||
def get_auto_tokenizer(model_path, task):
|
||||
def get_auto_tokenizer(tokenizer_model_path, task):
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
if task == SUMMARIZATION:
|
||||
return AutoTokenizer.from_pretrained(
|
||||
model_path, # 'roberta-base'
|
||||
pretrained_model_name_or_path=tokenizer_model_path,
|
||||
cache_dir=None,
|
||||
use_fast=True,
|
||||
revision="main",
|
||||
use_auth_token=None,
|
||||
)
|
||||
else:
|
||||
return AutoTokenizer.from_pretrained(model_path, use_fast=True)
|
||||
return AutoTokenizer.from_pretrained(tokenizer_model_path, use_fast=True)
|
||||
|
||||
|
||||
def tokenize_text(X, Y=None, task=None, custom_hpo_args=None, tokenizer=None):
|
||||
def tokenize_text(X, Y=None, task=None, hf_args=None, tokenizer=None):
|
||||
if task in (SEQCLASSIFICATION, SEQREGRESSION):
|
||||
X_tokenized = tokenize_onedataframe(
|
||||
X,
|
||||
tokenizer=tokenizer,
|
||||
task=task,
|
||||
custom_hpo_args=custom_hpo_args,
|
||||
hf_args=hf_args,
|
||||
prefix_str="",
|
||||
)
|
||||
return X_tokenized, None
|
||||
elif task == TOKENCLASSIFICATION:
|
||||
return tokenize_text_tokclassification(
|
||||
X, Y, tokenizer=tokenizer, custom_hpo_args=custom_hpo_args
|
||||
X, Y, tokenizer=tokenizer, hf_args=hf_args
|
||||
)
|
||||
elif task in NLG_TASKS:
|
||||
return tokenize_seq2seq(
|
||||
X, Y, tokenizer=tokenizer, task=task, custom_hpo_args=custom_hpo_args
|
||||
)
|
||||
return tokenize_seq2seq(X, Y, tokenizer=tokenizer, task=task, hf_args=hf_args)
|
||||
elif task == MULTICHOICECLASSIFICATION:
|
||||
return tokenize_text_multiplechoice(
|
||||
X, tokenizer=tokenizer, custom_hpo_args=custom_hpo_args
|
||||
)
|
||||
return tokenize_text_multiplechoice(X, tokenizer=tokenizer, hf_args=hf_args)
|
||||
|
||||
|
||||
def tokenize_seq2seq(X, Y, tokenizer, task=None, custom_hpo_args=None):
|
||||
def tokenize_seq2seq(X, Y, tokenizer, task=None, hf_args=None):
|
||||
model_inputs = tokenize_onedataframe(
|
||||
X,
|
||||
tokenizer=tokenizer,
|
||||
task=task,
|
||||
custom_hpo_args=custom_hpo_args,
|
||||
hf_args=hf_args,
|
||||
prefix_str="summarize: ",
|
||||
)
|
||||
labels = None
|
||||
|
@ -83,7 +77,7 @@ def tokenize_seq2seq(X, Y, tokenizer, task=None, custom_hpo_args=None):
|
|||
Y.to_frame(),
|
||||
tokenizer=tokenizer,
|
||||
task=task,
|
||||
custom_hpo_args=custom_hpo_args,
|
||||
hf_args=hf_args,
|
||||
prefix_str="",
|
||||
)
|
||||
labels["label"] = [
|
||||
|
@ -97,15 +91,18 @@ def tokenize_seq2seq(X, Y, tokenizer, task=None, custom_hpo_args=None):
|
|||
|
||||
|
||||
def tokenize_and_align_labels(
|
||||
examples, tokenizer, custom_hpo_args=None, X_sent_key=None, Y_sent_key=None
|
||||
examples,
|
||||
tokenizer,
|
||||
hf_args=None,
|
||||
X_sent_key=None,
|
||||
Y_sent_key=None,
|
||||
return_column_name=False,
|
||||
):
|
||||
global tokenized_column_names
|
||||
|
||||
tokenized_inputs = tokenizer(
|
||||
[list(examples[X_sent_key])],
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
max_length=custom_hpo_args.max_seq_length,
|
||||
max_length=hf_args.max_seq_length,
|
||||
# We use this argument because the texts in our dataset are lists of words (with a label for each word).
|
||||
is_split_into_words=True,
|
||||
)
|
||||
|
@ -134,27 +131,37 @@ def tokenize_and_align_labels(
|
|||
# label_ids.append(b_to_i_label[label_to_id[label[word_idx]]])
|
||||
previous_word_idx = word_idx
|
||||
tokenized_inputs["label"] = label_ids
|
||||
tokenized_column_names = sorted(tokenized_inputs.keys())
|
||||
tokenized_input_and_labels = [tokenized_inputs[x] for x in tokenized_column_names]
|
||||
for key_idx, each_key in enumerate(tokenized_column_names):
|
||||
tmp_column_names = sorted(tokenized_inputs.keys())
|
||||
tokenized_input_and_labels = [tokenized_inputs[x] for x in tmp_column_names]
|
||||
for key_idx, each_key in enumerate(tmp_column_names):
|
||||
if each_key != "label":
|
||||
tokenized_input_and_labels[key_idx] = tokenized_input_and_labels[key_idx][0]
|
||||
return tokenized_input_and_labels
|
||||
if return_column_name:
|
||||
return tokenized_input_and_labels, tmp_column_names
|
||||
else:
|
||||
return tokenized_input_and_labels
|
||||
|
||||
|
||||
def tokenize_text_tokclassification(X, Y, tokenizer, custom_hpo_args=None):
|
||||
def tokenize_text_tokclassification(X, Y, tokenizer, hf_args=None):
|
||||
import pandas as pd
|
||||
|
||||
global tokenized_column_names
|
||||
if Y is not None:
|
||||
X_and_Y = pd.concat([X, Y.to_frame()], axis=1)
|
||||
X_key = list(X.keys())[0]
|
||||
Y_key = list(Y.to_frame().keys())[0]
|
||||
_, tokenized_column_names = tokenize_and_align_labels(
|
||||
X_and_Y.iloc[0],
|
||||
tokenizer=tokenizer,
|
||||
hf_args=hf_args,
|
||||
X_sent_key=X_key,
|
||||
Y_sent_key=Y_key,
|
||||
return_column_name=True,
|
||||
)
|
||||
X_and_Y_tokenized = X_and_Y.apply(
|
||||
lambda x: tokenize_and_align_labels(
|
||||
x,
|
||||
tokenizer=tokenizer,
|
||||
custom_hpo_args=custom_hpo_args,
|
||||
hf_args=hf_args,
|
||||
X_sent_key=X_key,
|
||||
Y_sent_key=Y_key,
|
||||
),
|
||||
|
@ -170,11 +177,21 @@ def tokenize_text_tokclassification(X, Y, tokenizer, custom_hpo_args=None):
|
|||
y_tokenized = X_and_Y_tokenized.iloc[:, label_idx]
|
||||
else:
|
||||
X_key = list(X.keys())[0]
|
||||
|
||||
_, tokenized_column_names = tokenize_and_align_labels(
|
||||
X.iloc[0],
|
||||
tokenizer=tokenizer,
|
||||
hf_args=hf_args,
|
||||
X_sent_key=X_key,
|
||||
Y_sent_key=None,
|
||||
return_column_name=True,
|
||||
)
|
||||
|
||||
d = X.apply(
|
||||
lambda x: tokenize_and_align_labels(
|
||||
x,
|
||||
tokenizer=tokenizer,
|
||||
custom_hpo_args=custom_hpo_args,
|
||||
hf_args=hf_args,
|
||||
X_sent_key=X_key,
|
||||
Y_sent_key=None,
|
||||
),
|
||||
|
@ -192,28 +209,34 @@ def tokenize_onedataframe(
|
|||
X,
|
||||
tokenizer,
|
||||
task=None,
|
||||
custom_hpo_args=None,
|
||||
hf_args=None,
|
||||
prefix_str=None,
|
||||
):
|
||||
import pandas
|
||||
|
||||
global tokenized_column_names
|
||||
|
||||
with tokenizer.as_target_tokenizer():
|
||||
_, tokenized_column_names = tokenize_row(
|
||||
dict(X.iloc[0]),
|
||||
tokenizer,
|
||||
prefix=(prefix_str,) if task is SUMMARIZATION else None,
|
||||
task=task,
|
||||
hf_args=hf_args,
|
||||
return_column_name=True,
|
||||
)
|
||||
d = X.apply(
|
||||
lambda x: tokenize_row(
|
||||
x,
|
||||
tokenizer,
|
||||
prefix=(prefix_str,) if task is SUMMARIZATION else None,
|
||||
task=task,
|
||||
custom_hpo_args=custom_hpo_args,
|
||||
hf_args=hf_args,
|
||||
),
|
||||
axis=1,
|
||||
result_type="expand",
|
||||
)
|
||||
X_tokenized = pandas.DataFrame(columns=tokenized_column_names)
|
||||
X_tokenized[tokenized_column_names] = d
|
||||
return X_tokenized
|
||||
X_tokenized = pandas.DataFrame(columns=tokenized_column_names)
|
||||
X_tokenized[tokenized_column_names] = d
|
||||
return X_tokenized
|
||||
|
||||
|
||||
def postprocess_text(preds, labels):
|
||||
|
@ -230,35 +253,49 @@ def postprocess_text(preds, labels):
|
|||
return preds, labels
|
||||
|
||||
|
||||
def tokenize_row(this_row, tokenizer, prefix=None, task=None, custom_hpo_args=None):
|
||||
global tokenized_column_names
|
||||
def tokenize_row(
|
||||
this_row,
|
||||
tokenizer,
|
||||
prefix=None,
|
||||
task=None,
|
||||
hf_args=None,
|
||||
return_column_name=False,
|
||||
):
|
||||
assert (
|
||||
"max_seq_length" in custom_hpo_args.__dict__
|
||||
"max_seq_length" in hf_args.__dict__
|
||||
), "max_seq_length must be provided for glue"
|
||||
|
||||
if prefix:
|
||||
this_row = tuple(["".join(x) for x in zip(prefix, this_row)])
|
||||
|
||||
# tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenized_example = tokenizer(
|
||||
*tuple(this_row),
|
||||
padding="max_length",
|
||||
max_length=custom_hpo_args.max_seq_length,
|
||||
max_length=hf_args.max_seq_length,
|
||||
truncation=True,
|
||||
)
|
||||
if task in NLG_TASKS:
|
||||
tokenized_example["decoder_input_ids"] = tokenized_example["input_ids"]
|
||||
tokenized_column_names = sorted(tokenized_example.keys())
|
||||
return [tokenized_example[x] for x in tokenized_column_names]
|
||||
tmp_column_names = sorted(tokenized_example.keys())
|
||||
if return_column_name:
|
||||
return [tokenized_example[x] for x in tmp_column_names], tmp_column_names
|
||||
else:
|
||||
return [tokenized_example[x] for x in tmp_column_names]
|
||||
|
||||
|
||||
def tokenize_text_multiplechoice(X, tokenizer, custom_hpo_args=None):
|
||||
def tokenize_text_multiplechoice(X, tokenizer, hf_args=None):
|
||||
import pandas
|
||||
|
||||
global tokenized_column_names
|
||||
|
||||
t = X[["sent1", "sent2", "ending0", "ending1", "ending2", "ending3"]]
|
||||
_, tokenized_column_names = tokenize_swag(
|
||||
t.iloc[0],
|
||||
tokenizer=tokenizer,
|
||||
hf_args=hf_args,
|
||||
return_column_name=True,
|
||||
)
|
||||
d = t.apply(
|
||||
lambda x: tokenize_swag(x, tokenizer, custom_hpo_args),
|
||||
lambda x: tokenize_swag(x, tokenizer=tokenizer, hf_args=hf_args),
|
||||
axis=1,
|
||||
result_type="expand",
|
||||
)
|
||||
|
@ -269,9 +306,7 @@ def tokenize_text_multiplechoice(X, tokenizer, custom_hpo_args=None):
|
|||
return output, None
|
||||
|
||||
|
||||
def tokenize_swag(this_row, tokenizer, custom_hpo_args=None):
|
||||
global tokenized_column_names
|
||||
|
||||
def tokenize_swag(this_row, tokenizer, hf_args=None, return_column_name=False):
|
||||
first_sentences = [[this_row["sent1"]] * 4]
|
||||
# get each 1st sentence, multiply to 4 sentences
|
||||
question_headers = this_row["sent2"]
|
||||
|
@ -289,11 +324,15 @@ def tokenize_swag(this_row, tokenizer, custom_hpo_args=None):
|
|||
tokenized_example = tokenizer(
|
||||
*tuple([first_sentences, second_sentences]),
|
||||
truncation=True,
|
||||
max_length=custom_hpo_args.max_seq_length,
|
||||
max_length=hf_args.max_seq_length,
|
||||
padding=False,
|
||||
)
|
||||
tokenized_column_names = sorted(tokenized_example.keys())
|
||||
return [tokenized_example[x] for x in tokenized_column_names]
|
||||
tmp_column_names = sorted(tokenized_example.keys())
|
||||
|
||||
if return_column_name:
|
||||
return [tokenized_example[x] for x in tmp_column_names], tmp_column_names
|
||||
else:
|
||||
return [tokenized_example[x] for x in tmp_column_names]
|
||||
|
||||
|
||||
def separate_config(config, task):
|
||||
|
@ -333,7 +372,9 @@ def get_num_labels(task, y_train):
|
|||
|
||||
|
||||
def is_a_list_of_str(this_obj):
|
||||
return isinstance(this_obj, list) and all(isinstance(x, str) for x in this_obj)
|
||||
return (isinstance(this_obj, list) or isinstance(this_obj, np.ndarray)) and all(
|
||||
isinstance(x, str) for x in this_obj
|
||||
)
|
||||
|
||||
|
||||
def _clean_value(value: Any) -> str:
|
||||
|
@ -386,14 +427,19 @@ def get_logdir_name(dirname, local_dir):
|
|||
return logdir
|
||||
|
||||
|
||||
def get_trial_fold_name(local_dir, trial_config, trial_id):
|
||||
global counter
|
||||
counter = counter + 1
|
||||
experiment_tag = "{0}_{1}".format(str(counter), format_vars(trial_config))
|
||||
logdir = get_logdir_name(
|
||||
_generate_dirname(experiment_tag, trial_id=trial_id), local_dir
|
||||
)
|
||||
return logdir
|
||||
class Counter:
|
||||
counter = 0
|
||||
|
||||
@staticmethod
|
||||
def get_trial_fold_name(local_dir, trial_config, trial_id):
|
||||
Counter.counter += 1
|
||||
experiment_tag = "{0}_{1}".format(
|
||||
str(Counter.counter), format_vars(trial_config)
|
||||
)
|
||||
logdir = get_logdir_name(
|
||||
_generate_dirname(experiment_tag, trial_id=trial_id), local_dir
|
||||
)
|
||||
return logdir
|
||||
|
||||
|
||||
def load_model(checkpoint_path, task, num_labels, per_model_config=None):
|
||||
|
@ -499,7 +545,7 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None):
|
|||
|
||||
def compute_checkpoint_freq(
|
||||
train_data_size,
|
||||
custom_hpo_args,
|
||||
hf_args,
|
||||
num_train_epochs,
|
||||
batch_size,
|
||||
):
|
||||
|
@ -508,7 +554,7 @@ def compute_checkpoint_freq(
|
|||
min(num_train_epochs, 1)
|
||||
* train_data_size
|
||||
/ batch_size
|
||||
/ custom_hpo_args.ckpt_per_epoch
|
||||
/ hf_args.ckpt_per_epoch
|
||||
)
|
||||
+ 1
|
||||
)
|
||||
|
@ -516,7 +562,7 @@ def compute_checkpoint_freq(
|
|||
|
||||
|
||||
@dataclass
|
||||
class HPOArgs:
|
||||
class HFArgs:
|
||||
"""The HPO setting.
|
||||
Args:
|
||||
output_dir (str): data root directory for outputing the log, etc.
|
||||
|
@ -534,7 +580,12 @@ class HPOArgs:
|
|||
|
||||
model_path: str = field(
|
||||
default="facebook/muppet-roberta-base",
|
||||
metadata={"help": "model path model for HPO"},
|
||||
metadata={"help": "model path for HPO"},
|
||||
)
|
||||
|
||||
tokenizer_model_path: str = field(
|
||||
default=None,
|
||||
metadata={"help": "tokenizer model path for HPO"},
|
||||
)
|
||||
|
||||
fp16: bool = field(default=True, metadata={"help": "whether to use the FP16 mode"})
|
||||
|
@ -552,12 +603,17 @@ class HPOArgs:
|
|||
|
||||
ckpt_per_epoch: int = field(default=1, metadata={"help": "checkpoint per epoch"})
|
||||
|
||||
per_device_eval_batch_size: int = field(
|
||||
default=1,
|
||||
metadata={"help": "per gpu evaluation batch size"},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def load_args():
|
||||
from dataclasses import fields
|
||||
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
for each_field in fields(HPOArgs):
|
||||
for each_field in fields(HFArgs):
|
||||
print(each_field)
|
||||
arg_parser.add_argument(
|
||||
"--" + each_field.name,
|
||||
|
|
|
@ -79,7 +79,7 @@ class TrainingLogWriter(object):
|
|||
sample_size,
|
||||
):
|
||||
if self.file is None:
|
||||
raise IOError("Call open() to open the outpute file first.")
|
||||
raise IOError("Call open() to open the output file first.")
|
||||
if validation_loss is None:
|
||||
raise ValueError("TEST LOSS NONE ERROR!!!")
|
||||
record = TrainingLogRecord(
|
||||
|
@ -109,7 +109,7 @@ class TrainingLogWriter(object):
|
|||
|
||||
def checkpoint(self):
|
||||
if self.file is None:
|
||||
raise IOError("Call open() to open the outpute file first.")
|
||||
raise IOError("Call open() to open the output file first.")
|
||||
if self.current_best_loss_record_id is None:
|
||||
logger.warning(
|
||||
"flaml.training_log: checkpoint() called before any record is written, skipped."
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -4,12 +4,17 @@ from requests.exceptions import ChunkedEncodingError
|
|||
|
||||
def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
|
||||
from flaml.data import load_openml_dataset
|
||||
import urllib3
|
||||
|
||||
try:
|
||||
X_train, X_test, y_train, y_test = load_openml_dataset(
|
||||
dataset_id=1169, data_dir="test/", dataset_format=dataset_format
|
||||
)
|
||||
except (OpenMLServerException, ChunkedEncodingError) as e:
|
||||
except (
|
||||
OpenMLServerException,
|
||||
ChunkedEncodingError,
|
||||
urllib3.exceptions.ReadTimeoutError,
|
||||
) as e:
|
||||
print(e)
|
||||
return
|
||||
""" import AutoML class from flaml package """
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
def test_load_args_sub():
|
||||
from flaml.nlp.utils import HPOArgs
|
||||
from flaml.nlp.utils import HFArgs
|
||||
|
||||
HPOArgs.load_args()
|
||||
HFArgs.load_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -84,9 +84,10 @@ def test_hf_data():
|
|||
"task": "seq-classification",
|
||||
"metric": "accuracy",
|
||||
"log_file_name": "seqclass.log",
|
||||
"use_ray": False,
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
automl_settings["hf_args"] = {
|
||||
"model_path": "google/electra-small-discriminator",
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 5,
|
||||
|
@ -116,7 +117,6 @@ def test_hf_data():
|
|||
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
|
||||
with open("automl.pkl", "rb") as f:
|
||||
automl = pickle.load(f)
|
||||
shutil.rmtree("test/data/output/")
|
||||
automl.predict(X_test)
|
||||
automl.predict(["test test", "test test"])
|
||||
automl.predict(
|
||||
|
@ -164,7 +164,7 @@ def _test_custom_data():
|
|||
"metric": "accuracy",
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
automl_settings["hf_args"] = {
|
||||
"model_path": "google/electra-small-discriminator",
|
||||
"output_dir": "data/output/",
|
||||
"ckpt_per_epoch": 1,
|
||||
|
@ -183,6 +183,16 @@ def _test_custom_data():
|
|||
]
|
||||
)
|
||||
|
||||
import pickle
|
||||
|
||||
automl.pickle("automl.pkl")
|
||||
|
||||
with open("automl.pkl", "rb") as f:
|
||||
automl = pickle.load(f)
|
||||
config = automl.best_config.copy()
|
||||
config["learner"] = automl.best_estimator
|
||||
automl.trainable(config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_hf_data()
|
||||
|
|
|
@ -52,7 +52,7 @@ def test_classification_head():
|
|||
"metric": "accuracy",
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
automl_settings["hf_args"] = {
|
||||
"model_path": "google/electra-small-discriminator",
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 1,
|
||||
|
|
|
@ -19,8 +19,7 @@ def custom_metric(
|
|||
from flaml.model import TransformersEstimator
|
||||
|
||||
if estimator._trainer is None:
|
||||
estimator._init_model_for_predict(X_test)
|
||||
trainer = estimator._trainer
|
||||
trainer, _, _ = estimator._init_model_for_predict(X_test)
|
||||
estimator._trainer = None
|
||||
else:
|
||||
trainer = estimator._trainer
|
||||
|
@ -103,7 +102,7 @@ def test_custom_metric():
|
|||
"log_file_name": "seqclass.log",
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
automl_settings["hf_args"] = {
|
||||
"model_path": "google/electra-small-discriminator",
|
||||
"output_dir": "data/output/",
|
||||
"ckpt_per_epoch": 1,
|
||||
|
|
|
@ -43,7 +43,7 @@ def test_cv():
|
|||
"n_splits": 3,
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
automl_settings["hf_args"] = {
|
||||
"model_path": "google/electra-small-discriminator",
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 1,
|
||||
|
|
|
@ -216,7 +216,7 @@ def test_mcc():
|
|||
"log_file_name": "seqclass.log",
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
automl_settings["hf_args"] = {
|
||||
"model_path": "google/electra-small-discriminator",
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 1,
|
||||
|
|
|
@ -6,6 +6,9 @@ import pytest
|
|||
def test_regression():
|
||||
try:
|
||||
import ray
|
||||
|
||||
if not ray.is_initialized():
|
||||
ray.init()
|
||||
except ImportError:
|
||||
return
|
||||
from flaml import AutoML
|
||||
|
@ -65,10 +68,10 @@ def test_regression():
|
|||
"task": "seq-regression",
|
||||
"metric": "pearsonr",
|
||||
"starting_points": {"transformer": {"num_train_epochs": 1}},
|
||||
"use_ray": True,
|
||||
"use_ray": {"local_dir": "data/outut/"},
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
automl_settings["hf_args"] = {
|
||||
"model_path": "google/electra-small-discriminator",
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 1,
|
||||
|
@ -77,6 +80,7 @@ def test_regression():
|
|||
|
||||
ray.shutdown()
|
||||
ray.init()
|
||||
|
||||
automl.fit(
|
||||
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
||||
)
|
||||
|
|
|
@ -58,7 +58,7 @@ def test_summarization():
|
|||
"log_file_name": "seqclass.log",
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
automl_settings["hf_args"] = {
|
||||
"model_path": "patrickvonplaten/t5-tiny-random",
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 1,
|
||||
|
|
|
@ -726,7 +726,7 @@ def test_tokenclassification():
|
|||
"metric": "seqeval",
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
automl_settings["hf_args"] = {
|
||||
"model_path": "bert-base-uncased",
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 1,
|
||||
|
|
|
@ -81,7 +81,7 @@ def _test_hf_data():
|
|||
"use_ray": True,
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
automl_settings["hf_args"] = {
|
||||
"model_path": "facebook/muppet-roberta-base",
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 5,
|
||||
|
|
|
@ -26,8 +26,8 @@ automl = AutoML()
|
|||
automl_settings = {
|
||||
"time_budget": 100,
|
||||
"task": "seq-classification",
|
||||
"custom_hpo_args": {"output_dir": "data/output/"},
|
||||
"gpu_per_trial": 1, # set to 0 if no GPU is available
|
||||
"hf_args": {"output_dir": "data/output/"}, # setting the huggingface arguments: output directory
|
||||
"gpu_per_trial": 1, # set to 0 if no GPU is available
|
||||
}
|
||||
automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
|
||||
automl.predict(X_test)
|
||||
|
@ -77,11 +77,11 @@ automl_settings = {
|
|||
"task": "seq-regression",
|
||||
"metric": "rmse",
|
||||
}
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
"model_path": "google/electra-small-discriminator",
|
||||
"output_dir": "data/output/",
|
||||
"ckpt_per_epoch": 5,
|
||||
"fp16": False,
|
||||
automl_settings["hf_args"] = { # setting the huggingface arguments
|
||||
"model_path": "google/electra-small-discriminator", # setting the language model
|
||||
"output_dir": "data/output/", # setting the output directory
|
||||
"ckpt_per_epoch": 5, # setting the number of checkpoints per epoch
|
||||
"fp16": False, # setting whether to use FP16
|
||||
}
|
||||
automl.fit(
|
||||
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
||||
|
@ -127,11 +127,11 @@ automl_settings = {
|
|||
"task": "summarization",
|
||||
"metric": "rouge1",
|
||||
}
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
"model_path": "t5-small",
|
||||
"output_dir": "data/output/",
|
||||
"ckpt_per_epoch": 5,
|
||||
"fp16": False,
|
||||
automl_settings["hf_args"] = { # setting the huggingface arguments
|
||||
"model_path": "t5-small", # setting the language model
|
||||
"output_dir": "data/output/", # setting the output directory
|
||||
"ckpt_per_epoch": 5, # setting the number of checkpoints per epoch
|
||||
"fp16": False, # setting whether to use FP16
|
||||
}
|
||||
automl.fit(
|
||||
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
||||
|
@ -205,4 +205,10 @@ Model config T5Config {
|
|||
}
|
||||
```
|
||||
|
||||
For tasks that are not currently supported, use `flaml.tune` for [customized tuning](Tune-HuggingFace).
|
||||
For tasks that are not currently supported, use `flaml.tune` for [customized tuning](Tune-HuggingFace).
|
||||
|
||||
### Link to Jupyter notebook
|
||||
|
||||
To run these examples in our Jupyter notebook, please go to:
|
||||
|
||||
[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/automl_nlp.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/automl_nlp.ipynb)
|
Loading…
Reference in New Issue