我有如下的CNNLstm模型。
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=3,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
#print(num_classes)
self.out = nn.Linear(32 * 75 * 75, num_classes)#32 * 75 * 75/64 * 37 * 37/128 * 18 * 18
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)
output = self.out(x)
return output, x
import torch
from torchvision import datasets, transforms
import torch.nn.functional as f
from torch_lr_finder import LRFinder
class CnnLstm(nn.Module):
def __init__(self):
super(CnnLstm, self).__init__()
self.cnn = CNN()
self.rnn = nn.LSTM(input_size=180000, hidden_size=256, num_layers=3, batch_first=True)#stacked LSTM with 2 layers
self.linear = nn.Linear(256, num_classes)
def forward(self, x):
batch_size, time_steps, channels, height, width = x.size()
c_in = x.view(batch_size * time_steps, channels, height, width)
_, c_out = self.cnn(c_in)
r_in = c_out.view(batch_size, time_steps, -1)
r_out, (_, _) = self.rnn(r_in)
r_out2 = self.linear(r_out[:, -1, :])
return f.log_softmax(r_out2, dim=1)
class TrainCNNLSTM:
def __init__(self):
self.seed = 1
self.batch_size = 8
self.validate_batch_size = 8
self.test_batch_size = 1
self.epoch = 50
self.learning_rate = 0.005
self.step = 100
self.train_loader = None
self.validate_loader = None
self.test_loader = None
self.modelloaded = False
self.model = CnnLstm().to(device)
self.criterion = nn.CrossEntropyLoss()
#self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate)#self.learning_rate = 0.001
self.optimizer = torch.optim.AdamW(self.model.parameters())
#self.scheduler = optim.lr_scheduler.OneCycleLR(self.optimizer, 2e-3, epochs=self.epoch , steps_per_epoch=len(train_loader))
def load_data(self):
data_loader = DataLoader()
self.train_loader = data_loader.get_train_data(self.batch_size)
self.validate_loader = data_loader.get_validate_data(self.validate_batch_size)
self.test_loader = data_loader.get_test_data(self.test_batch_size)
def do_lrfinder(self):
lr_finder = LRFinder(self.model, self.optimizer, self.criterion, device)
lr_finder.range_test(self.train_loader, end_lr=1, num_iter=1000)
lr_finder.plot()
plt.savefig("LRvsLoss.png")
plt.close()
def train(self):
for epoch in range(0, self.epoch):
t_losses=[]
for iteration, (data, target) in enumerate(self.train_loader):
print(data.shape)
data = np.expand_dims(data, axis=1)
print(data.shape)
data = torch.FloatTensor(data)
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
self.optimizer.zero_grad()
由于是 CNNLstm 模型,因此模型的数据输入形状为batch_size、time_steps、channels、height、width。
(8, 1, 3, 300, 300)
要使用torch_lr_finder
,我们需要运行以下代码。
lr_finder = LRFinder(self.model, self.optimizer, self.criterion, device)
lr_finder.range_test(self.train_loader, end_lr=1, num_iter=1000)
self.train_loader
输出形状为(8, 3, 300, 300)
. 所以在寻找学习率的过程中,self.model
不能使用。
我该如何使用torch_lr_finder
这种模型?