0

我编写了一个片段来对 Omniglot 图像进行分类。我计算每个时期的训练和验证损失,后者是使用网络之前未见过的图像计算的。两个地块如下:

训练损失 验证损失

由于训练损失减少而验证损失增加,我得出的结论是我的模型过拟合。我已经尝试了几个建议(例如这里)来克服这个问题,包括:

  • 增加训练集的大小。
  • 洗牌数据。
  • 添加 dropout 层(最高 p=0.9)。
  • 使用更小的模型。
  • 改变架构。
  • 改变学习率。
  • 减少批量大小。
  • 添加重量衰减。

但是,验证损失仍然增加。我想知道是否有任何其他建议可以改善这种行为,或者这是否不是过度拟合,但问题是别的。以下是此问题中使用的代码段。

import torch
import torchvision
import torchvision.transforms as transforms

from torch import nn, optim
from torch.utils.data import DataLoader


class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()

        dim_out = 964

        # -- embedding params
        self.cn1 = nn.Conv2d(1, 16, 7)
        self.cn2 = nn.Conv2d(16, 32, 4)
        self.cn3 = nn.Conv2d(32, 64, 3)
        self.pool = nn.MaxPool2d(2)

        self.bn1 = nn.BatchNorm2d(16)
        self.bn2 = nn.BatchNorm2d(32)
        self.bn3 = nn.BatchNorm2d(64)

        # -- prediction params
        self.fc1 = nn.Linear(256, 170)
        self.fc2 = nn.Linear(170, 50)
        self.fc3 = nn.Linear(50, dim_out)

        # -- non-linearity
        self.relu = nn.ReLU()
        self.Beta = 10
        self.sopl = nn.Softplus(beta=self.Beta)

    def forward(self, x):

        y1 = self.pool(self.bn1(self.relu(self.cn1(x))))
        y2 = self.pool(self.bn2(self.relu(self.cn2(y1))))
        y3 = self.relu(self.bn3(self.cn3(y2)))


        y3 = y3.view(y3.size(0), -1)

        y5 = self.sopl(self.fc1(y3))
        y6 = self.sopl(self.fc2(y5))

        return self.fc3(y6)


class Train:
    def __init__(self):

        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        # -- data
        dim = 28
        batch_size = 400
        my_transforms = transforms.Compose([transforms.Resize((dim, dim)), transforms.ToTensor()])
        trainset = torchvision.datasets.Omniglot(root="./data/omniglot_train/", download=False, transform=my_transforms)
        validset = torchvision.datasets.Omniglot(root="./data/omniglot_train/", background=False, download=False,
                                                     transform=my_transforms)
        self.TrainDataset = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
        self.ValidDataset = DataLoader(dataset=validset, batch_size=len(validset), shuffle=False)
        self.N_train = len(trainset)
        self.N_valid = len(validset)

        # -- model
        self.model = MyModel().to(self.device)

        # -- train
        self.epochs = 3000
        self.loss = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=1e-3)

    def train_epoch(self):

        self.model.train()
        train_loss = 0
        for batch_idx, data_batch in enumerate(self.TrainDataset):
            # -- predict
            predict = self.model(data_batch[0].to(self.device))

            # -- loss
            loss = self.loss(predict, data_batch[1].to(self.device))

            # -- optimize
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            train_loss += loss.item()

        return train_loss/(batch_idx+1)

    def valid_epoch(self):

        with torch.no_grad():
            self.model.eval()
            for data_batch in self.ValidDataset:
                # -- predict
                predict = self.model(data_batch[0].to(self.device))

                # -- loss
                loss = self.loss(predict, data_batch[1].to(self.device))

        return loss.item()

    def __call__(self):

        for epoch in range(self.epochs):
            train_loss = self.train_epoch()
            valid_loss = self.valid_epoch()

            print('Epoch {}: Training loss = {:.5f}, Validation loss = {:.5f}.'.format(epoch, train_loss, valid_loss))

        torch.save(self.model.state_dict(), './model_stat.pth')


if __name__ == '__main__':
    my_train = Train()
    my_train()
4

1 回答 1

0

如果您的训练准确度很好但测试(训练中未使用的数据)准确度很差,那么您就有过拟合问题。我对 CNN 模型也有同样的问题。您可以使用两种方法来克服过度拟合。首先是提前停车,其次是正规化。检查以下示例:

# L2 regularizers for layers
model = keras.Sequential([
    keras.layers.InputLayer(input_shape=(32, 32)),
    keras.layers.Reshape(target_shape=(32, 32, 1)),
    keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation=tf.nn.relu,  use_bias=True , kernel_regularizer =tf.keras.regularizers.l2( l=0.01)),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(10, activation = 'softmax', use_bias=True)
])


model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])
#Early Stopping
history = model.fit(X_train, Y_train, 
                            validation_data=(X_dev, Y_dev), 
                            epochs=4000, 
                            callbacks=EarlyStopping(monitor='val_loss'))

不要忘记导入以提前停止。

from tensorflow.keras.callbacks import EarlyStopping
于 2022-02-11T15:24:58.190 回答