我正在尝试通过扩展 nn.Module 来使用自定义损失函数,但我无法克服错误
变量的元素 0 不需要 grad 并且没有 grad_fn
注意:我的标签是大小列表:num_samples,但每个批次在整个批次中都具有相同的标签,因此我们通过调用将整个批次的标签缩小为单个标签.diag()
我的代码如下,基于迁移学习教程:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train(True) # Set model to training mode
else:
model.train(False) # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for data in dataloaders[phase]:
# get the inputs
inputs, labels = data
inputs = inputs.float()
# wrap them in Variable
if use_gpu:
inputs = Variable(inputs.cuda())
labels = Variable(labels.cuda())
else:
inputs = Variable(inputs)
labels = Variable(labels)
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(inputs)
#outputs = nn.functional.sigmoid(outputs).round()
_, preds = torch.max(outputs, 1)
label = labels.diag().float()
preds = preds.float()
loss = criterion(preds, label)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.data[0] * inputs.size(0)
running_corrects += torch.sum(pred == label.data)
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
我的损失函数定义如下:
class CustLoss(nn.Module):
def __init__(self):
super(CustLoss, self).__init__()
def forward(self, outputs, labels):
return cust_loss(outputs, labels)
def cust_loss(pred, targets):
'''preds are arrays of size classes with floats in them'''
'''targets are arrays of all the classes from the batch'''
'''we sum the classes from the batch and find the num correct'''
r = torch.sum(pred == targets)
return r
然后我运行以下命令来运行模型:
model_ft = models.resnet18(pretrained=True)
for param in model_ft.parameters():
param.requires_grad = False
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 3)
if use_gpu:
model_ft = model_ft.cuda()
criterion = CustLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.fc.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=25)
我试图让它与其他损失函数一起使用,但无济于事。loss.backward()
被调用时我总是得到同样的错误。
我的理解是,loss.backward
如果我扩展nn.Module
.