python - 一段时间后或重新开始学习时，Optuna 优化变得非常缓慢

Question

当尝试使用 optuna 优化我的模型时，我遇到了以下问题。第一次试验完成后，它变得非常慢。我正在尝试对 n_jobs=-1 进行 100 次试验。我还注意到，当我尝试恢复学习时，速度非常慢。第一次试验在几个小时内完成，在我恢复研究并让它运行 4 小时后，没有进一步的研究完成。

下面我将写下我昨天创建的研究中试验完成的时间。正如您在前几个小时中看到的那样，很多试验都完成了，但随后变得异常缓慢。距离上一次试炼结束已经过去了 4 个小时，距离之前的试炼结束已经将近 12 个小时。

[I 2022-02-06 18:02:34,335] A new study created with name: Optuna_100trials_v1[I 2022-02-06 18:02:34,335] A new study created with name: Optuna_100trials_v1
[I 2022-02-06 18:54:40,153] Trial 10 finished 
[I 2022-02-06 18:57:44,496] Trial 3 finished
[I 2022-02-06 19:03:30,819] Trial 5 finished
[I 2022-02-06 19:14:28,983] Trial 21 finished
[I 2022-02-06 19:16:04,672] Trial 18 finished
[I 2022-02-06 19:47:55,132] Trial 13 finished
[I 2022-02-06 19:49:19,882] Trial 16 finished
[I 2022-02-06 19:53:21,124] Trial 6 finished
[I 2022-02-06 19:57:54,052] Trial 1 finished
[I 2022-02-06 19:59:00,715] Trial 17 finished 
[I 2022-02-06 20:03:12,866] Trial 7 finished
[I 2022-02-06 20:03:59,517] Trial 26 finished
[I 2022-02-06 20:22:56,610] Trial 11 finished
[I 2022-02-06 21:06:18,959] Trial 27 finished
[I 2022-02-06 21:24:02,737] Trial 12 finished
[I 2022-02-06 21:43:53,425] Trial 29 finished 
[I 2022-02-06 21:44:39,988] Trial 30 finished
[I 2022-02-07 05:40:17,852] Trial 8 finished 
[I 2022-02-07 09:10:17,852] No new Trial finished -- 4 Hours since last Trial

我不知道是什么原因造成的，所以我把我的优化放在这里。

study = optuna.create_study(direction='minimize',study_name='Optuna_100trials_v1', storage='sqlite:///example2.db', load_if_exists=True )
study.optimize(objective, n_trials=100, n_jobs=-1)

def objective(trial):

    lr = trial.suggest_float("lr", 0.01, 0.2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [8,16,32,64])
    if batch_size ==8:
        T0 = trial.suggest_discrete_uniform("T0", 34,170,17)
    if batch_size ==16:
        T0 = trial.suggest_discrete_uniform("T0", 17,82,8)
    if batch_size ==32:
        T0 = trial.suggest_discrete_uniform("T0", 8,40,4)
    if batch_size ==64:
        T0 = trial.suggest_discrete_uniform("T0", 6,30,3)
    T0 = int(T0)

    n_hidden = trial.suggest_categorical("n_hidden", [8,16,32,64,128])
    weight_decay = trial.suggest_loguniform("weight_decay", 0.0000001, 0.00001)
    eta_min = trial.suggest_loguniform("eta_min", 0.00001, 0.01)
    T_mult = trial.suggest_categorical("T_mult", [1,2])
    dropout = trial.suggest_discrete_uniform("dropout", 0,1,0.1)
    n_layers = trial.suggest_categorical("n_layers", [1,2,3])
    activation_function = trial.suggest_categorical("activation_function", ["linear", "ReLu"])
    model_type = trial.suggest_categorical("model_type", ["Model_GRU", "Model_GRU_1", "Model_GRU_2"])

    global best_val_loss

    input_dim = feature_dim+features_added
    output_dim = 3
    n_epochs = 100
    iterations_per_epoch = 190
    best_val_loss = 100
    patience, trials = 30, 0

    if batch_size == 8:
        n_epochs = 120
    if batch_size == 16:
        n_epochs = 200
    if batch_size == 32:
        n_epochs = 380
    if batch_size == 64:
        n_epochs = 500
    
    fold_mean_vall_loss = []
    val_loss_mean = []
    val_loss_log = []
    
    for fold, (train_idx, test_idx) in enumerate(kfold.split(sequences, y_kfold)):
        patience, trials = 40, 0
        if (model_type == "Model_GRU" and activation_function == "linear"):
            model = Model_GRU(input_dim, output_dim, n_hidden, n_layers, dropout)
        elif (model_type == "Model_GRU_1"and activation_function == "linear"):
            model = Model_GRU_1(input_dim, output_dim, n_hidden, n_layers, dropout)
        elif (model_type == "Model_GRU_2"and activation_function == "linear"):
            model = Model_GRU_2(input_dim, output_dim, n_hidden, n_layers, dropout)
        elif (model_type == "Model_GRU_3"and activation_function == "linear"):
            model = Model_GRU_3(input_dim, output_dim, n_hidden, n_layers, dropout)
 
        elif (model_type == "Model_GRU" and activation_function == "ReLu"):
            model = Model_GRU_relu(input_dim, output_dim, n_hidden, n_layers, dropout)
        elif (model_type == "Model_GRU_1"and activation_function == "ReLu"):
            model = Model_GRU_1_relu(input_dim, output_dim, n_hidden, n_layers, dropout)
        elif (model_type == "Model_GRU_2"and activation_function == "ReLu"):
            model = Model_GRU_2_relu(input_dim, output_dim, n_hidden, n_layers, dropout)
        elif (model_type == "Model_GRU_3"and activation_function == "ReLu"):
            model = Model_GRU_3_relu(input_dim, output_dim, n_hidden, n_layers, dropout)
        else:
            print("No Model !")

        class_weights = torch.tensor([1,2,2]).float()
        criterion = nn.CrossEntropyLoss(weight=class_weights)
        opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
        sched = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(opt, T_0=T0, T_mult=T_mult, eta_min=eta_min, last_epoch=-1)
    
        train_subsampler = torch.utils.data.SubsetRandomSampler(train_idx)
        test_subsampler = torch.utils.data.SubsetRandomSampler(test_idx)
        dataset = BrakeNoiseData(sequences)
        trainloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, sampler=train_subsampler) 
        valloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, sampler=test_subsampler)

        train_losses = []
        train_accuracys = []
        validation_losses = []
        validation_accuracys = []
        roc_list = []
        lr_loss = []
        for epoch in range(n_epochs):
            running_loss = 0
            train_acc = 0
            for i, (inputs, labels) in enumerate(trainloader):

python - 一段时间后或重新开始学习时，Optuna 优化变得非常缓慢

0 回答 0

Related

Reference