我正在使用 Ray Tune 集成 (1.9.2) 和拥抱面部转换器框架 (4.15.0) 运行超参数调整。
这是负责该过程的代码(基于此示例):
def search_hyper_parameters(
trainer: Trainer, name: str = None, n_trials: int = 5
) -> Dict[str, Any]:
def get_hp_boundaries(_trial):
return {
"adam_beta1": tune.uniform(0.0, 1.0),
"adam_beta2": tune.uniform(0.0, 1.0),
"learning_rate": tune.loguniform(1e-6, 1e-4),
"num_train_epochs": tune.choice(list(range(3, 8))),
"seed": tune.randint(1, 100),
"per_device_train_batch_size": tune.choice([4, 8, 16, 32, 64]),
}
scheduler = PopulationBasedTraining(
time_attr="training_iteration",
metric="eval_accuracy",
mode="max",
perturbation_interval=1,
hyperparam_mutations={
"adam_beta1": tune.uniform(0.0, 1.0),
"adam_beta2": tune.uniform(0.0, 1.0),
"adam_epsilon": tune.choice([1e-8, 2e-8, 3e-8, 1e-9, 2e-9, 3e-10]),
"weight_decay": tune.uniform(0.0, 0.3),
"learning_rate": tune.uniform(1e-5, 5e-5),
"max_grad_norm": tune.uniform(0.0, 1.0),
},
)
reporter = CLIReporter(
parameter_columns={
"adam_epsilon": "adam_epsilon",
"adam_beta1": "adam_beta1",
"adam_beta2": "adam_beta2",
"weight_decay": "w_decay",
"seed": "seed",
"learning_rate": "lr",
"per_device_train_batch_size": "train_bs/gpu",
"num_train_epochs": "num_epochs",
"max_grad_norm": "max_grad_norm",
},
metric_columns=[
"eval_accuracy",
"eval_loss",
"epoch",
"training_iteration",
],
)
best_trial = trainer.hyperparameter_search(
backend="ray",
hp_space=get_hp_boundaries,
direction="maximize",
n_trials=n_trials,
resources_per_trial={"cpu": 1, "gpu": 1},
scheduler=scheduler,
keep_checkpoints_num=1,
checkpoint_score_attr="training_iteration",
stop={"eval_accuracy": 0.98},
progress_reporter=reporter,
local_dir="~/ray_results/",
name=name,
log_to_file=True,
)
return best_trial.hyperparameters
有时,调优会因Invalid beta parameter
错误而失败。
File "/app/lib/python3.8/site-packages/ray/tune/function_runner.py", line 262, in run
self._entrypoint()
File "/app/lib/python3.8/site-packages/ray/tune/function_runner.py", line 330, in entrypoint
return self._trainable_func(self.config, self._status_reporter,
File "/app/lib/python3.8/site-packages/ray/tune/function_runner.py", line 597, in _trainable_func
output = fn()
File "/app/lib/python3.8/site-packages/transformers/integrations.py", line 282, in dynamic_modules_import_trainable
return trainable(*args, **kwargs)
File "/app/lib/python3.8/site-packages/ray/tune/utils/trainable.py", line 344, in inner
trainable(config, **fn_kwargs)
File "/app/lib/python3.8/site-packages/transformers/integrations.py", line 183, in _objective
local_trainer.train(resume_from_checkpoint=checkpoint, trial=trial)
File "/app/lib/python3.8/site-packages/transformers/trainer.py", line 1181, in train
self.create_optimizer_and_scheduler(num_training_steps=max_steps)
File "/app/lib/python3.8/site-packages/transformers/trainer.py", line 801, in create_optimizer_and_scheduler
self.create_optimizer()
File "/app/lib/python3.8/site-packages/transformers/trainer.py", line 842, in create_optimizer
self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs)
File "/app/lib/python3.8/site-packages/transformers/optimization.py", line 306, in __init__
raise ValueError(f"Invalid beta parameter: {betas[1]} - should be in [0.0, 1.0)")
我为 adam_beta1 和 adam_beta2 值定义了超参数边界。
哪种配置会导致此错误?