python - 将 Adam 优化器用于 LSTM 网络与 LBGFS

Question

我修改了关于 LSTM 的pytorch 教程（正弦波预测：给定 [0:N] 正弦值 -> [N:2N] 值）以使用 Adam 优化器而不是 LBFGS 优化器。但是，该模型训练不好，无法正确预测正弦波。由于在大多数情况下我们使用 Adam 优化器进行 RNN 训练，我想知道如何解决这个问题。我还想知道关于序列入序列出的代码段（通过循环完成：for input_t in input.split(1, dim=1)）是否可以由 pytorch 模块或函数完成。

from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib
#matplotlib.use('Agg')
import matplotlib.pyplot as plt

class Sequence(nn.Module):
    def __init__(self):
        super(Sequence, self).__init__()
        self.lstm1 = nn.LSTMCell(1, 51)
        self.lstm2 = nn.LSTMCell(51, 51)
        self.linear = nn.Linear(51, 1)

    def forward(self, input, future = 0):
        outputs = []
        h_t = torch.zeros(input.size(0), 51, dtype=torch.double)
        c_t = torch.zeros(input.size(0), 51, dtype=torch.double)
        h_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
        c_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)

        for input_t in input.split(1, dim=1):
            h_t, c_t = self.lstm1(input_t, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs += [output]
        for i in range(future):# if we should predict the future
            h_t, c_t = self.lstm1(output, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs += [output]
        outputs = torch.cat(outputs, dim=1)
        return outputs


if __name__ == '__main__':
    # set random seed to 0
    np.random.seed(0)
    torch.manual_seed(0)
    # load data and make training set
    data = torch.load('traindata.pt')
    input = torch.from_numpy(data[3:, :-1])
    target = torch.from_numpy(data[3:, 1:])
    test_input = torch.from_numpy(data[:3, :-1])
    test_target = torch.from_numpy(data[:3, 1:])
    print("input.size", input.size())
    print("target.size", target.size())
    # build the model
    seq = Sequence()
    seq.double()
    criterion = nn.MSELoss()
    # use LBFGS as optimizer since we can load the whole data to train
    optimizer = optim.Adam(seq.parameters(), lr=0.005)
    #begin to train
    for i in range(15):
        print('STEP: ', i)
        seq.train()
        def run1step():
            optimizer.zero_grad()
            out = seq(input)
            loss = criterion(out, target)
            print('train loss:', loss.item())
            loss.backward()
            return loss
        run1step()
        optimizer.step()
        # begin to predict, no need to track gradient here
        seq.eval()
        with torch.no_grad():
            future = 1000
            pred = seq(test_input, future=future)
            loss = criterion(pred[:, :-future], test_target)
            print('test  loss:', loss.item())
            y = pred.detach().numpy()
        # draw the result
        def draw(yi, color):
            plt.figure(figsize=(30,10))
            plt.title('Predict future values for time sequences\n(Dashlines are predicted values)', fontsize=30)
            plt.xlabel('x', fontsize=20)
            plt.ylabel('y', fontsize=20)
            plt.xticks(fontsize=20)
            plt.yticks(fontsize=20)
            plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0)
            plt.plot(np.arange(input.size(1), input.size(1) + future), yi[input.size(1):], color + ':', linewidth = 2.0)
            plt.show()
        if i == 14:
          draw(y[0], 'r')
          draw(y[1], 'g')
          draw(y[2], 'b')
          plt.savefig('predict_LSTM%d.pdf'%i)
          #plt.close()

score 1 · Accepted Answer

我刚刚执行了您的代码和原始代码。我认为问题在于你没有用 ADAM 训练你的代码足够长的时间。您可以看到在第 15 步时您的训练损失仍然越来越小。所以我将步数从 15 更改为 45，这是第 40 步之后生成的数字：

原始代码在第 4 步之后达到 4e-05 loss。但在那之后，损失不知何故爆炸了。您使用 ADAM 的代码可以减少所有 45 个步骤的损失，但最终损失约为 0.001。我希望我能正确运行这两个程序。

哦，关于你的第二个问题。

也想知道关于sequence-in-sequence-out的代码段

是的，您可以编写一个函数或定义一个带有两个 LSTM 的模块来执行此操作。但这没有意义，因为您的网络仅包含两个 LSTM。毕竟，您必须在某个时候进行这种“布线”工作。

如果您的网络包含几个这样的块，您可以编写一个具有两个 LSTM 的模块并将其用作原始模块，例如self.BigLSTM = BigLSTM(...)，就像您定义的那样self.lstm1 = nn.LSTMCell(...)。

python - 将 Adam 优化器用于 LSTM 网络与 LBGFS

1 回答 1

Related

Reference