deep-learning - 如果我的回归模型陷入高价值损失，我该怎么办？

Question

我正在使用神经网络来解决回归问题，其中我有 3 个特征，并且我试图预测一个连续值。我注意到我的神经网络开始学习良好，但在 10 个 epoch 之后，它陷入了高损失值并且无法再改进。

我尝试使用 Adam 和其他自适应优化器而不是 SGD，但没有奏效。我尝试了一个复杂的架构，比如添加层、神经元、批量归一化和其他激活等。但也没有用。
我尝试调试并尝试找出实现是否有问题，但是当我仅使用 10 个数据示例时，我的模型学习速度很快，因此没有错误。随着数据示例的增加，我开始增加数据示例并监控我的模型结果。当我达到 3000 个数据示例时，我的模型开始陷入高价值损失。
我尝试增加层数、神经元并尝试其他激活、批量标准化。我的数据也在 [-1, 1] 之间进行了归一化，我的目标值没有归一化，因为它是回归并且我正在预测一个连续值。我也尝试过使用 keras，但我得到了相同的结果。

我的真实数据集有 40000 个数据，我不知道我应该尝试什么，我几乎尝试了所有我知道的优化方法，但没有一个有效。如果有人可以指导我，我将不胜感激。我将发布我的代码，但也许它太混乱而无法理解，我确信我的实现没有问题，我正在使用 skorch/pytorch 和一些 SKlearn 函数：

# take all features as an Independant variable except the bearing and distance
# here when I start small the model learn good but from 3000 data points as you can see the model stuck on a high value. I mean the start loss is 15 and it start to learn good but when it reach 9 it stucks there
# and if I try to use the whole dataset for training then the loss start at 47 and start decreasing until it reach 36 and then stucks there too
X = dataset.iloc[:3000, 0:-2].reset_index(drop=True).to_numpy().astype(np.float32)

# take distance and bearing as the output values:
y = dataset.iloc[:3000, -2:].reset_index(drop=True).to_numpy().astype(np.float32)
y_bearing = y[:, 0].reshape(-1, 1)
y_distance = y[:, 1].reshape(-1, 1)

# normalize the input values
scaler = StandardScaler()
X_norm = scaler.fit_transform(X, y)

X_br_train, X_br_test, y_br_train, y_br_test = train_test_split(X_norm,
                                                                y_bearing,
                                                                test_size=0.1,
                                                                random_state=42,
                                                                shuffle=True)

X_dis_train, X_dis_test, y_dis_train, y_dis_test = train_test_split(X_norm,
                                                                    y_distance,
                                                                    test_size=0.1,
                                                                    random_state=42,
                                                                    shuffle=True)
bearing_trainset = Dataset(X_br_train, y_br_train)
bearing_testset = Dataset(X_br_test, y_br_test)

distance_trainset = Dataset(X_dis_train, y_dis_train)
distance_testset = Dataset(X_dis_test, y_dis_test)


def root_mse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))


class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))


class AED(nn.Module):
    """custom average euclidean distance loss"""
    def __init__(self):
        super().__init__()

    def forward(self, yhat, y):
        return torch.dist(yhat, y)


def train(on_target,
          hidden_units,
          batch_size,
          epochs,
          optimizer,
          lr,
          regularisation_factor,
          train_shuffle):

    network = None
    trainset = distance_trainset if on_target.lower() == 'distance' else bearing_trainset
    testset = distance_testset if on_target.lower() == 'distance' else bearing_testset
    print(f"shape of trainset.X = {trainset.X.shape}, shape of trainset.y = {trainset.y.shape}")
    print(f"shape of testset.X = {testset.X.shape}, shape of testset.y = {testset.y.shape}")

    mse = EpochScoring(scoring=mean_squared_error, lower_is_better=True, name='MSE')
    r2 = EpochScoring(scoring=r2_score, lower_is_better=False, name='R2')
    rmse = EpochScoring(scoring=make_scorer(root_mse), lower_is_better=True, name='RMSE')

    checkpoint = Checkpoint(dirname=f'results/{on_target}/checkpoints')
    train_end_checkpoint = TrainEndCheckpoint(dirname=f'results/{on_target}/checkpoints')

    if on_target.lower() == 'bearing':
        network = BearingNetwork(n_features=X_norm.shape[1],
                                 n_hidden=hidden_units,
                                 n_out=y_distance.shape[1])

    elif on_target.lower() == 'distance':
        network = DistanceNetwork(n_features=X_norm.shape[1],
                                  n_hidden=hidden_units,
                                  n_out=1)

    model = NeuralNetRegressor(
        module=network,
        criterion=RMSELoss,
        device='cpu',
        batch_size=batch_size,
        lr=lr,
        optimizer=optim.Adam if optimizer.lower() == 'adam' else optim.SGD,
        optimizer__weight_decay=regularisation_factor,
        max_epochs=epochs,
        iterator_train__shuffle=train_shuffle,

        train_split=predefined_split(testset),
        callbacks=[mse, r2, rmse, checkpoint, train_end_checkpoint]
    )

    print(f"{'*' * 10} start training the {on_target} model {'*' * 10}")
    history = model.fit(trainset, y=None)

    print(f"{'*' * 10} End Training the {on_target} Model {'*' * 10}")


if __name__ == '__main__':

    args = parser.parse_args()

    train(on_target=args.on_target,
          hidden_units=args.hidden_units,
          batch_size=args.batch_size,
          epochs=args.epochs,
          optimizer=args.optimizer,
          lr=args.learning_rate,
          regularisation_factor=args.regularisation_lambda,
          train_shuffle=args.shuffle)

这是我的网络声明：

class DistanceNetwork(nn.Module):
    """separate NN for predicting distance"""
    def __init__(self, n_features=5, n_hidden=16, n_out=1):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(n_features, n_hidden),
            nn.LeakyReLU(),
            nn.Linear(n_hidden, 5),
            nn.LeakyReLU(),
            nn.Linear(5, n_out)
        )

这是训练时的日志：

deep-learning - 如果我的回归模型陷入高价值损失，我该怎么办？

0 回答 0

Related

Reference