问题描述
所以我有使用 PyTorch 用 python 编写的模型类:
class ResBlock(nn.Module, ABC):
def __init__(self, in_channels: int, filters: int, conv_num: int):
super(ResBlock, self).__init__()
self.filters = filters
self.conv_num = conv_num
self.input_conv = torch.nn.Conv1d(in_channels=in_channels, out_channels=filters, kernel_size=1)
self.inner_conv = torch.nn.Conv1d(in_channels=in_channels, out_channels=filters, kernel_size=3, padding=1)
self.outer_conv = torch.nn.Conv1d(in_channels=filters, out_channels=filters, kernel_size=3, padding=1)
self.max_pool = torch.nn.MaxPool1d(kernel_size=2, stride=2)
def forward(self, x):
y = x
for i in range(self.conv_num - 1):
if i == 0:
y = self.inner_conv(y)
else:
y = self.outer_conv(y)
y = torch.relu(y)
y = self.outer_conv(y)
s = self.input_conv(x)
y = s + y
y = torch.relu(y)
return self.max_pool(y)
class Net(nn.Module, ABC):
def __init__(self, num_of_classes: int):
super(Net, self).__init__()
self.block_1 = ResBlock(1, 16, 2)
self.block_2 = ResBlock(16, 32, 2)
self.block_3 = ResBlock(32, 64, 3)
self.block_4 = ResBlock(64, 128, 3)
self.block_5 = ResBlock(128, 128, 3)
self.avg_pool = torch.nn.AvgPool1d(kernel_size=3, stride=3)
self.flatten = torch.nn.Flatten()
self.dense_1 = torch.nn.Linear(
in_features=self.block_5.filters * (249 // self.avg_pool.kernel_size[0]),
out_features=256
)
self.dense_2 = torch.nn.Linear(in_features=256, out_features=128)
self.classifier = torch.nn.Linear(in_features=128, out_features=num_of_classes)
def forward(self, x):
x = self.block_1(x)
x = self.block_2(x)
x = self.block_3(x)
x = self.block_4(x)
x = self.block_5(x)
x = self.avg_pool(x)
x = self.flatten(x)
x = self.dense_1(x)
x = self.dense_2(x)
x = self.classifier(x)
return x
在 python 环境中使用时,它工作得非常好。我已经对其进行了训练,在测试集上获得了约 65% 的准确率,并希望使用 TorchScript 将其导出,然后导入 C++ 应用程序。
这是用于导出的代码:
# Training code skipped for simplification...
# Here the model is actually trained, weights are updated and so on
jit_model = torch.jit.script(model)
jit_model.save('torchscript-model.pt')
在将其导入 C++ 之前,我已经检查了模型是否正确导出,方法是将其导入 python 脚本,torch.jit.load
并再次对照我的测试数据集进行检查,以获得与预期相同的约 65% 的准确度。
因此,下一个合乎逻辑的步骤是将模型导入 C++ 程序,并使用相同的数据评估加载的模型。这是代码:
#include <torch/torch.h>
#include <torch/script.h>
#include "constants.hh"
int main() {
torch::manual_seed(1);
torch::Device device(torch::kCPU);
auto model = torch::jit::load("./torchscript-model.pt");
model.to(device);
auto test_raw_dataset = CsvDataset(constants::kTestCsv);
auto test_dataset = test_raw_dataset.map(torch::data::transforms::Stack<>());
auto test_data_loader = torch::data::make_data_loader<torch::data::samplers::SequentialSampler>(std::move(test_dataset), torch::data::DataLoaderOptions(constants::kBatchSize));
size_t correct_count = 0;
for (const auto& batch : *test_data_loader) {
auto inputs = batch.data.to(device);
auto labels = batch.target.to(device);
inputs = inputs.unsqueeze(1);
labels = labels.squeeze(1);
auto outputs = model.forward(inputs).toTensor();
auto prediction = outputs.argmax(1);
correct_count += prediction.eq(labels).sum().item<int64_t>();
}
auto accuracy = correct_count / test_dataset.size().value();
// Rest of code removed for clarity...
}
实际问题
但是发生的事情是在 C++ 中计算的准确度等于 ~12%。为什么?
到目前为止我尝试/发现了什么:
- 将模型加载到 C++ 中时的所有预测都是相同的(等于
6
)。当模型在 python 中加载时,一切都很好。 - 获取后的所有计算
outputs
在 Python 和 C++ 中都是相同的,所以问题不在于我计算准确性的方式。 - 使用
torch::manual_seed
什么都不做。(它不应该改变任何东西,但为什么不尝试......) torch::NoGradGuard
也什么都不做。- 我还确保测试数据在 Python 和 C++ 中以相同的顺序进入模型。
- 在这两种情况下,模型都在 eval 模式下运行
更新 1
torch.ones
使用python 和 c++对静态输入检查加载的模型仍然会产生不同的结果。
按照@gspr 的要求更新 2 Python 代码:
dataset = CsvDataset(os.path.join(constants.CSV_DIR, 'train.csv'))
sampler = torch.utils.data.sampler.RandomSampler(dataset)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=constants.BATCH_SIZE, sampler=sampler)
train_test_size = len(dataset)
batch_count = math.ceil(train_test_size / constants.BATCH_SIZE)
test_dataset = CsvDataset(os.path.join(constants.CSV_DIR, 'test.csv'))
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=constants.BATCH_SIZE)
test_set_size = len(test_dataset)
model = Net(dataset.num_classes())
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=constants.LEARNING_RATE)
for epoch in range(constants.EPOCHS):
running_loss = 0.0
correct_count = 0
for i, data in enumerate(dataloader, 1):
inputs, labels = data
inputs = inputs.unsqueeze(1)
labels = labels.squeeze(1)
outputs = model.forward(inputs)
loss = criterion(outputs, labels)
running_loss += loss.item()
prediction = outputs.argmax(1)
correct_count += sum(prediction.eq(labels)).item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
test_loss = 0.0
test_correct = 0.0
with torch.no_grad():
for i, data in enumerate(test_dataloader, 1):
inputs, labels = data
inputs = inputs.unsqueeze(1)
labels = labels.squeeze(1)
outputs = model.forward(inputs)
loss = criterion(outputs, labels)
test_loss += loss.item()
prediction = outputs.argmax(1)
test_correct += sum(prediction.eq(labels)).item()
test_sample_loss = test_loss / test_set_size
test_accuracy = test_correct / test_set_size
script_module = torch.jit.script(model)
torch.jit.save(script_module, 'traced_model.torch')