machine-learning - Inception V2 在 Inception V1 上没有改进

Question

我正在关注 D2L 书籍，GoogLeNet 章节的练习之一是从 Rethinking the Inception Architecture for Computer Vision 论文中获取模型，所以我这样做了，并且基本上基于 Inception V1 架构重新创建了 Inception V2 架构。 D2L 书，我唯一没有包括的是论文中描述的网格缩减。我希望这会改善我的模型的结果，但如果有的话，它会使它们变得更糟。我正在使用 GoogLeNet 章节中关于时尚_mnist 数据集的 D2L 代码测试模型，并使用 Inception V1 获得了 0.908 的测试准确度，但是在我的 V2 实现中，我只得到了 0.885 的测试 acc，这是我能做到的最高得到。我预计 V2 模型会更好，所以我真的不知道我哪里出错了，

这是前 3 个 inception 块的代码（来自论文中的图 5）

class InceptionB1(nn.Module):
    # `c1`--`c4` are the number of output channels for each path
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(InceptionB1, self).__init__(**kwargs)
        # Path 1 is a single 1 x 1 convolutional layer
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # Path 2 is a 1 x 1 convolutional layer followed by a 3 x 3
        # convolutional layer
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # Path 3 is a 1 x 1 convolutional layer followed by 2 3x3 convs (factorization)
        # convolutional layer
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=3, padding=1)
        # Replacing path3_2 with modified path using factorization through smaller convolutions
        self.p3_3 = nn.Conv2d(c3[1], c3[1], kernel_size=3, padding = 1)
        # Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1
        # convolutional layer
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_3(self.p3_2(F.relu(self.p3_1(x)))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        # Concatenate the outputs on the channel dimension
        return torch.cat((p1, p2, p3, p4), dim=1)

这是我中间 5 个块的代码（论文中的图 6）

class InceptionB2(nn.Module):
    # `c1`--`c4` are the number of output channels for each path
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(InceptionB2, self).__init__(**kwargs)
        # Path 1 is a single 1 x 1 convolutional layer
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # Path 2 is a 1 x 1 convolutional layer followed by a 1 x 3 then 3x1 conv
        # convolutional layer
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=(1,3), padding=1)
        self.p2_3 = nn.Conv2d(c2[1], c2[1], kernel_size=(3,1), padding=0)
        # Path 3 is a 1 x 1 conv layer followed by a 1x3 conv then 3x1 then 1x3 then 3x1
        # convolutional layer
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=(1,3), padding=1)
        self.p3_3 = nn.Conv2d(c3[1], c3[1], kernel_size=(3,1), padding=0)
        self.p3_4 = nn.Conv2d(c3[1], c3[1], kernel_size=(1,3), padding=0)
        self.p3_5 = nn.Conv2d(c3[1], c3[1], kernel_size=(3,1), padding=1)
        # Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1
        # convolutional layer
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_3(F.relu(self.p2_2(F.relu(self.p2_1(x))))))
        p3 = F.relu(self.p3_5(F.relu(self.p3_4(F.relu(self.p3_3(F.relu(self.p3_2(F.relu(self.p3_1(x))))))))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        # Concatenate the outputs on the channel dimension
        return torch.cat((p1, p2, p3, p4), dim=1)

这是我最后 2 个块的代码（论文中的图 7）

class InceptionB3(nn.Module):
    # `c1`--`c4` are the number of output channels for each path
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(InceptionB3, self).__init__(**kwargs)
        # Path 1 is a single 1 x 1 convolutional layer
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # Path 2 is a 1 x 1 convolutional layer followed by a 1x3 and 3x1
        # convolutional layer
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2_1 = nn.Conv2d(c2[0], c2[1], kernel_size=(1,3), padding=0)
        self.p2_2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=(3,1), padding=0)
        # Path 3 is a 1 x 1 convolutional layer followed by a 3x3 then 1x3 and 3x1
        # convolutional layer
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=3, padding=1)
        self.p3_3_1 = nn.Conv2d(c3[1], c3[1], kernel_size=(1,3), padding=0)
        self.p3_3_2 = nn.Conv2d(c3[1], c3[1], kernel_size=(3,1), padding=0)
        # Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1
        # convolutional layer
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.p1_1(x))

        p2_1 = F.relu(self.p2_2_1(F.relu(self.p2_1(x))))
        p2_2 = F.relu(elf.p2_2_2(F.relu(self.p2_1(x))))
        p2 = torch.matmul(p2_1, p2_2)

        p3_1 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p3_2_1 = F.relu(self.p3_3_1(p3_1))
        p3_2_2 = F.relu(sself.p3_3_2(p3_1))
        p3 = torch.matmul(p3_2_1, p3_2_2)

        p4 = F.relu(self.p4_2(self.p4_1(x)))
        # Concatenate the outputs on the channel dimension
        return torch.cat((p1, p2, p3, p4), dim=1)

这是 D2L 章节的链接 https://d2l.ai/chapter_convolutional-modern/googlenet.html

这是 v1 模型的训练图

这是 v2 模型的训练图

machine-learning - Inception V2 在 Inception V1 上没有改进

0 回答 0

Related

Reference