python - PyTorch minibatch 训练非常慢

Question

在成人收入数据集上训练我的模型并使用小批量训练时，无论我使用 PyTorch 的 DataLoader 还是小批量训练的基本实现，训练都非常慢。我的代码有问题，还是有其他方法可以加快成人收入数据集的训练速度？我想使用 one-hot 编码和交叉熵损失 + softmax。我是否必须使用不同的损失函数或删除 softmax 层？

import pandas as pd
from pandas import read_csv
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.model_selection import train_test_split
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import warnings

warnings.filterwarnings('ignore')
device = torch.device("cpu")


class Model(nn.Module):
    def __init__(self, input_dim):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(input_dim, 12)
        self.layer2 = nn.Linear(12, 2)

    def forward(self, x):
        x = F.sigmoid(self.layer1(x))
        x = F.softmax(self.layer2(x))  # To check with the loss function
        return x


# load dataset
filename = './datasets/adult-all.csv'
dataframe = read_csv(filename, header=None, na_values='?')
# drop rows with missing
dataframe = dataframe.dropna()
# summarize the class distribution
target = dataframe.values[:, -1]
# split into inputs and outputs
last_ix = len(dataframe.columns) - 1
X_, y = dataframe.drop(last_ix, axis=1), dataframe[last_ix]
# select categorical and numerical features
cat_ix = X_.select_dtypes(include=['object', 'bool']).columns
num_ix = X_.select_dtypes(include=['int64', 'float64']).columns
# label encode the target variable to have the classes 0 and 1
y = LabelEncoder().fit_transform(y)
# one-hot encoding of categorical features
df_cat = pd.get_dummies(X_[cat_ix])
# binning of numerical features
x = X_.drop(columns=cat_ix, axis=1)
est = KBinsDiscretizer(n_bins=3, encode='onehot-dense', strategy='uniform')
df_num = est.fit_transform(x)
X = pd.concat([df_cat.reset_index(drop=True), pd.DataFrame(df_num).reset_index(drop=True)], axis=1)
# split training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_tr = Variable(torch.tensor(X_train.values, dtype=torch.float))
X_te = Variable(torch.tensor(X_test.values, dtype=torch.float))
y_tr = Variable(torch.tensor(y_train, dtype=torch.long))
y_te = Variable(torch.tensor(y_test, dtype=torch.long))


def binary_cross_entropy_one_hot(input, target):
    return torch.nn.CrossEntropyLoss()(input, target)


def _accuracy(y_pred, y_true):
    classes = torch.argmax(y_pred, dim=1)
    labels = y_true
    accuracy = torch.mean((classes == labels).float())
    return accuracy


model = Model(X.shape[1])
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 1000
accuracy = 0.0
minibatch = True

# training loop
train_loss = []
for epoch in range(epochs):
    if minibatch:
        batch_size = 128  # or whatever
        permutation = torch.randperm(X_tr.size()[0])
        for i in range(0, X_tr.size()[0], batch_size):
            optimizer.zero_grad()
            indices = permutation[i:i + batch_size]
            batch_x, batch_y = X_tr[indices], y_tr[indices]
            # in case you wanted a semi-full example
            outputs = model.forward(batch_x)
            loss = binary_cross_entropy_one_hot(outputs, batch_y)
            loss.backward()
            optimizer.step()
        if epoch % 100 == 0:
            print(f'epoch: {epoch:2}  loss: {loss:10.8f}')
        # train_ds = TensorDataset(X_tr, y_tr)
        # train_dl = DataLoader(train_ds, batch_size=256, shuffle=True)
        # batch_loss = 0.0
        # batch_accuracy = 0.0
        # for nb, (x_batch, y_batch) in enumerate(train_dl):  # manually set number of batches?
        #     optimizer.zero_grad()
        #     y_pred_train = model(x_batch)
        #     loss = binary_cross_entropy_one_hot(y_pred_train, y_batch)
        #     loss.backward()
        #     optimizer.step()
        #     batch_loss += loss.item()
        #     batch_accuracy += _accuracy(y_pred_train, y_batch)
        # train_loss.append(batch_loss / (nb + 1))
        # accuracy = batch_accuracy / (nb + 1)
        # if epoch % 100 == 0:
        #     print(f'epoch: {epoch:2}  loss: {train_loss[epoch]:10.8f}')
    else:
        optimizer.zero_grad()
        y_pred = model(X_tr)
        # computing the loss function
        loss = binary_cross_entropy_one_hot(y_pred, y_tr)
        if epoch % 100 == 0:
            print(f'epoch: {epoch:2}  loss: {loss.item():10.8f}')
        loss.backward()
        optimizer.step()
        accuracy = _accuracy(y_pred, y_tr)
# evaluation on test data
with torch.no_grad():
    model.eval()
    y_pred = model(X_te)
    test_loss = binary_cross_entropy_one_hot(y_pred, y_te)
    test_acc = _accuracy(y_pred, y_te)
print("Loss on test data: {:.4}".format(test_loss))
print("Accuracy on test data: {:.4}".format(test_acc))

score 0 · Accepted Answer

时间取决于您input_dim的、数据集的大小以及每个时期的更新次数（// 批量大小）。根据您与我们分享的内容，我不确定问题是什么，以及是否真的存在任何瓶颈。但是，我要指出以下几点，它们可能会对您有所帮助（排名不分先后）：

无需用torch.autograd.Variable. 它已被弃用且不再需要，Autograd自动支持torch.tensorsset requires_gradto True。
如果您正在使用torch.nn.CrossEntropyLoss，则不应F.softmax在模型的输出上使用。那是因为CrossEntropyLoss包括nn.LogSoftmax()和nn.NLLLoss()。也不需要每次调用它时都初始化模块：
```
criterion = torch.nn.CrossEntropyLoss()
def binary_cross_entropy_one_hot(input, target):
    return criterion(input, target)
```

我看到您正在重新定义每个时代的数据加载器。那是你真正想要的吗？如果不是，您可以在训练循环之外定义它：

train_ds = TensorDataset(X_tr, y_tr)
train_dl = DataLoader(train_ds, batch_size=256, shuffle=True)

for epoch in range(epochs):
    for x, y in train_dl:
        # ...

我会呼吁.item()您的准确性（在调用时_accuracy）不要将其附加到计算图并在准备好时将其从内存中释放。

python - PyTorch minibatch 训练非常慢

1 回答 1

Related

Reference