当尝试使用 optuna 优化我的模型时,我遇到了以下问题。第一次试验完成后,它变得非常慢。我正在尝试对 n_jobs=-1 进行 100 次试验。我还注意到,当我尝试恢复学习时,速度非常慢。第一次试验在几个小时内完成,在我恢复研究并让它运行 4 小时后,没有进一步的研究完成。
下面我将写下我昨天创建的研究中试验完成的时间。正如您在前几个小时中看到的那样,很多试验都完成了,但随后变得异常缓慢。距离上一次试炼结束已经过去了 4 个小时,距离之前的试炼结束已经将近 12 个小时。
[I 2022-02-06 18:02:34,335] A new study created with name: Optuna_100trials_v1[I 2022-02-06 18:02:34,335] A new study created with name: Optuna_100trials_v1
[I 2022-02-06 18:54:40,153] Trial 10 finished
[I 2022-02-06 18:57:44,496] Trial 3 finished
[I 2022-02-06 19:03:30,819] Trial 5 finished
[I 2022-02-06 19:14:28,983] Trial 21 finished
[I 2022-02-06 19:16:04,672] Trial 18 finished
[I 2022-02-06 19:47:55,132] Trial 13 finished
[I 2022-02-06 19:49:19,882] Trial 16 finished
[I 2022-02-06 19:53:21,124] Trial 6 finished
[I 2022-02-06 19:57:54,052] Trial 1 finished
[I 2022-02-06 19:59:00,715] Trial 17 finished
[I 2022-02-06 20:03:12,866] Trial 7 finished
[I 2022-02-06 20:03:59,517] Trial 26 finished
[I 2022-02-06 20:22:56,610] Trial 11 finished
[I 2022-02-06 21:06:18,959] Trial 27 finished
[I 2022-02-06 21:24:02,737] Trial 12 finished
[I 2022-02-06 21:43:53,425] Trial 29 finished
[I 2022-02-06 21:44:39,988] Trial 30 finished
[I 2022-02-07 05:40:17,852] Trial 8 finished
[I 2022-02-07 09:10:17,852] No new Trial finished -- 4 Hours since last Trial
我不知道是什么原因造成的,所以我把我的优化放在这里。
study = optuna.create_study(direction='minimize',study_name='Optuna_100trials_v1', storage='sqlite:///example2.db', load_if_exists=True )
study.optimize(objective, n_trials=100, n_jobs=-1)
def objective(trial):
lr = trial.suggest_float("lr", 0.01, 0.2, log=True)
batch_size = trial.suggest_categorical("batch_size", [8,16,32,64])
if batch_size ==8:
T0 = trial.suggest_discrete_uniform("T0", 34,170,17)
if batch_size ==16:
T0 = trial.suggest_discrete_uniform("T0", 17,82,8)
if batch_size ==32:
T0 = trial.suggest_discrete_uniform("T0", 8,40,4)
if batch_size ==64:
T0 = trial.suggest_discrete_uniform("T0", 6,30,3)
T0 = int(T0)
n_hidden = trial.suggest_categorical("n_hidden", [8,16,32,64,128])
weight_decay = trial.suggest_loguniform("weight_decay", 0.0000001, 0.00001)
eta_min = trial.suggest_loguniform("eta_min", 0.00001, 0.01)
T_mult = trial.suggest_categorical("T_mult", [1,2])
dropout = trial.suggest_discrete_uniform("dropout", 0,1,0.1)
n_layers = trial.suggest_categorical("n_layers", [1,2,3])
activation_function = trial.suggest_categorical("activation_function", ["linear", "ReLu"])
model_type = trial.suggest_categorical("model_type", ["Model_GRU", "Model_GRU_1", "Model_GRU_2"])
global best_val_loss
input_dim = feature_dim+features_added
output_dim = 3
n_epochs = 100
iterations_per_epoch = 190
best_val_loss = 100
patience, trials = 30, 0
if batch_size == 8:
n_epochs = 120
if batch_size == 16:
n_epochs = 200
if batch_size == 32:
n_epochs = 380
if batch_size == 64:
n_epochs = 500
fold_mean_vall_loss = []
val_loss_mean = []
val_loss_log = []
for fold, (train_idx, test_idx) in enumerate(kfold.split(sequences, y_kfold)):
patience, trials = 40, 0
if (model_type == "Model_GRU" and activation_function == "linear"):
model = Model_GRU(input_dim, output_dim, n_hidden, n_layers, dropout)
elif (model_type == "Model_GRU_1"and activation_function == "linear"):
model = Model_GRU_1(input_dim, output_dim, n_hidden, n_layers, dropout)
elif (model_type == "Model_GRU_2"and activation_function == "linear"):
model = Model_GRU_2(input_dim, output_dim, n_hidden, n_layers, dropout)
elif (model_type == "Model_GRU_3"and activation_function == "linear"):
model = Model_GRU_3(input_dim, output_dim, n_hidden, n_layers, dropout)
elif (model_type == "Model_GRU" and activation_function == "ReLu"):
model = Model_GRU_relu(input_dim, output_dim, n_hidden, n_layers, dropout)
elif (model_type == "Model_GRU_1"and activation_function == "ReLu"):
model = Model_GRU_1_relu(input_dim, output_dim, n_hidden, n_layers, dropout)
elif (model_type == "Model_GRU_2"and activation_function == "ReLu"):
model = Model_GRU_2_relu(input_dim, output_dim, n_hidden, n_layers, dropout)
elif (model_type == "Model_GRU_3"and activation_function == "ReLu"):
model = Model_GRU_3_relu(input_dim, output_dim, n_hidden, n_layers, dropout)
else:
print("No Model !")
class_weights = torch.tensor([1,2,2]).float()
criterion = nn.CrossEntropyLoss(weight=class_weights)
opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
sched = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(opt, T_0=T0, T_mult=T_mult, eta_min=eta_min, last_epoch=-1)
train_subsampler = torch.utils.data.SubsetRandomSampler(train_idx)
test_subsampler = torch.utils.data.SubsetRandomSampler(test_idx)
dataset = BrakeNoiseData(sequences)
trainloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, sampler=train_subsampler)
valloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, sampler=test_subsampler)
train_losses = []
train_accuracys = []
validation_losses = []
validation_accuracys = []
roc_list = []
lr_loss = []
for epoch in range(n_epochs):
running_loss = 0
train_acc = 0
for i, (inputs, labels) in enumerate(trainloader):