我正在使用 Gpytorch 来运行深度高斯过程回归回归模型,并且我正在使用自动 Adam 优化器来优化超参数。
这是我正在使用的合成数据集的创建:
train_x = []
train_y = []
train_x1 = torch.linspace(0, 0.2, 100)
train_x1=torch.FloatTensor(train_x1)
train_y1 = torch.sin(train_x1 * (2 * math.pi)) + torch.randn(train_x1.size()) * math.sqrt(0.04)
train_x2 = torch.linspace(0.35, 0.5, 3400)
train_2=torch.FloatTensor(train_x2)
train_y2 = train_x2 * (2 * math.pi) + torch.randn(train_x2.size()) * math.sqrt(0.005)
train_x3 = torch.linspace(0.7, 1, 2500)
train_x3=torch.FloatTensor(train_x3)
train_y3 = train_x3 * train_x3 + torch.randn(train_x3.size()) * math.sqrt(0.1)
train_x.extend(train_x1)
train_x.extend(train_x2)
train_x.extend(train_x3)
train_x=torch.FloatTensor(train_x)
train_y.extend(train_y1)
train_y.extend(train_y2)
train_y.extend(train_y3)
train_y=torch.FloatTensor(train_y)
当我运行这一行来找出超参数时,我得到一个错误:只有一个元素张量可以转换为 Python 标量
for param_name, param in model.named_parameters():
print(f'Parameter name: {param_name:42} value = {param.item()}')
有人知道我该如何解决这个问题吗?这是我的深度 GP 回归模型的定义:
#define an example deep GP hidden layer.
#Instead of extending ApproximateGP, we extend DeepGPLayer.
#DeepGPLayers need a number of input dimensions, a number of output dimensions, and a number of samples. This is kind of like a
#linear layer in a standard neural network – input_dims defines how many inputs this hidden layer will expect, and output_dims
#defines how many hidden GPs to create outputs for.
#In this particular example, we make a particularly fancy DeepGPLayer that has “skip connections” with previous layers, similar
#to a ResNet
class ToyDeepGPHiddenLayer(DeepGPLayer):
def __init__(self, input_dims, output_dims, num_inducing=128, mean_type='constant'): #num_inducing=128(before)
if output_dims is None:
inducing_points = torch.randn(num_inducing, input_dims)
batch_shape = torch.Size([])
else:
inducing_points = torch.randn(output_dims, num_inducing, input_dims)
batch_shape = torch.Size([output_dims])
variational_distribution = CholeskyVariationalDistribution(
num_inducing_points=num_inducing,
batch_shape=batch_shape
)
variational_strategy = VariationalStrategy(
self,
inducing_points,
variational_distribution,
learn_inducing_locations=True
)
super(ToyDeepGPHiddenLayer, self).__init__(variational_strategy, input_dims, output_dims)
if mean_type == 'constant':
self.mean_module = ConstantMean(batch_shape=batch_shape)
else:
self.mean_module = LinearMean(input_dims)
#Scalekernel???
self.covar_module = ScaleKernel(
RBFKernel(batch_shape=batch_shape, ard_num_dims=input_dims),
batch_shape=batch_shape, ard_num_dims=None
)
def forward(self, x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return MultivariateNormal(mean_x, covar_x)
def __call__(self, x, *other_inputs, **kwargs):
"""
Overriding __call__ isn't strictly necessary, but it lets us add concatenation based skip connections
easily. For example, hidden_layer2(hidden_layer1_outputs, inputs) will pass the concatenation of the first
hidden layer's outputs and the input data to hidden_layer2.
"""
if len(other_inputs):
if isinstance(x, gpytorch.distributions.MultitaskMultivariateNormal):
x = x.rsample()
processed_inputs = [
inp.unsqueeze(0).expand(self.num_samples, *inp.shape)
for inp in other_inputs
]
x = torch.cat([x] + processed_inputs, dim=-1)
return super().__call__(x, are_samples=bool(len(other_inputs)))
#Building the deep GP
num_output_dims = 2
class DeepGP(DeepGP):
def __init__(self, train_x_shape):
hidden_layer_1 = ToyDeepGPHiddenLayer(
input_dims=train_x_shape[-1],
output_dims=num_output_dims,
mean_type='lmeaninear',
)
hidden_layer_2 = ToyDeepGPHiddenLayer(
input_dims=hidden_layer_1.output_dims,
output_dims=num_output_dims,
mean_type='linear',
)
hidden_layer_3 = ToyDeepGPHiddenLayer(
input_dims=hidden_layer_2.output_dims,
output_dims=num_output_dims,
mean_type='linear',
)
hidden_layer_4 = ToyDeepGPHiddenLayer(
input_dims=hidden_layer_3.output_dims,
output_dims=num_output_dims,
mean_type='linear',
)
last_layer = ToyDeepGPHiddenLayer(
input_dims=hidden_layer_4.output_dims,
output_dims=None,
mean_type='constant',
)
super().__init__()
self.hidden_layer_1 = hidden_layer_1
self.hidden_layer_2 = hidden_layer_2
self.hidden_layer_3 = hidden_layer_3
self.hidden_layer_4 = hidden_layer_4
self.last_layer = last_layer
# self. likelihood = gpytorch.likelihoods.GaussianLikelihood(noise_constraint=gpytorch.constraints.GreaterThan(1e-11))
# self.likelihood.register_prior("noise_prior", gpytorch.priors.HorseshoePrior(0.2), "noise")
# self.likelihood.noise = 1e-2
self.likelihood = GaussianLikelihood()
def forward(self, inputs):
hidden_rep1 = self.hidden_layer_1(inputs)
hidden_rep2 = self.hidden_layer_2(hidden_rep1)
hidden_rep3= self.hidden_layer_3(hidden_rep2)
hidden_rep4= self.hidden_layer_4(hidden_rep3)
output = self.last_layer(hidden_rep4)
return output
def predict(self, test_loader):
with torch.no_grad():
mus = []
variances = []
lls = []
for x_batch, y_batch in test_loader:
preds = self.likelihood(self(x_batch))
mus.append(preds.mean)
variances.append(preds.variance)
lls.append(model.likelihood.log_marginal(y_batch, model(x_batch)))
print(x_batch)
print(mus)
return torch.cat(mus, dim=-1), torch.cat(variances, dim=-1), torch.cat(lls, dim=-1)
model = DeepGP(train_x.shape)
if torch.cuda.is_available():
model = model.cuda()
培训代码:
#training the data
num_epochs = 50
num_samples = 3 if smoke_test else 10
optimizer = torch.optim.Adam([
{'params': model.parameters()},
], lr=0.1)
mll = DeepApproximateMLL(VariationalELBO(model.likelihood, model, train_x.shape[-2]))
epochs_iter = tqdm.notebook.tqdm(range(num_epochs), desc="Epoch")
for i in epochs_iter:
# Within each iteration, we will go over each minibatch of data
minibatch_iter = tqdm.notebook.tqdm(train_loader, desc="Minibatch", leave=False)
for x_batch, y_batch in minibatch_iter:
with gpytorch.settings.num_likelihood_samples(num_samples):
optimizer.zero_grad()
output = model(x_batch)
loss = -mll(output, y_batch)
loss.backward()
optimizer.step()
minibatch_iter.set_postfix(loss=loss.item())
测试代码:
import gpytorch
import math
test_dataset = TensorDataset(test_x, test_y)
test_loader = DataLoader(test_dataset, batch_size=1024)
print(test_x.shape)
model.eval()
predictive_means, predictive_variances, test_lls = model.predict(test_loader)
rmse = torch.mean(torch.pow(predictive_means.mean(0) - test_y, 2)).sqrt()
print(f"RMSE: {rmse.item()}, NLL: {-test_lls.mean().item()}")
提前非常感谢。