regression - 深度高斯过程超参数

Question

我正在使用 Gpytorch 来运行深度高斯过程回归回归模型，并且我正在使用自动 Adam 优化器来优化超参数。

这是我正在使用的合成数据集的创建：

train_x = []
train_y = []

train_x1 = torch.linspace(0, 0.2, 100)
train_x1=torch.FloatTensor(train_x1)
train_y1 = torch.sin(train_x1 * (2 * math.pi)) + torch.randn(train_x1.size()) * math.sqrt(0.04)

train_x2 = torch.linspace(0.35, 0.5, 3400)
train_2=torch.FloatTensor(train_x2)
train_y2 = train_x2 * (2 * math.pi) + torch.randn(train_x2.size()) * math.sqrt(0.005)

train_x3 = torch.linspace(0.7, 1, 2500)
train_x3=torch.FloatTensor(train_x3)
train_y3 = train_x3 * train_x3 + torch.randn(train_x3.size()) * math.sqrt(0.1)

train_x.extend(train_x1)
train_x.extend(train_x2)
train_x.extend(train_x3)

train_x=torch.FloatTensor(train_x)

train_y.extend(train_y1)
train_y.extend(train_y2)
train_y.extend(train_y3)

train_y=torch.FloatTensor(train_y)

当我运行这一行来找出超参数时，我得到一个错误：只有一个元素张量可以转换为 Python 标量

for param_name, param in model.named_parameters():
    print(f'Parameter name: {param_name:42} value = {param.item()}')

有人知道我该如何解决这个问题吗？这是我的深度 GP 回归模型的定义：

#define an example deep GP hidden layer.
#Instead of extending ApproximateGP, we extend DeepGPLayer.
#DeepGPLayers need a number of input dimensions, a number of output dimensions, and a number of samples. This is kind of like a 
#linear layer in a standard neural network – input_dims defines how many inputs this hidden layer will expect, and output_dims 
#defines how many hidden GPs to create outputs for.
#In this particular example, we make a particularly fancy DeepGPLayer that has “skip connections” with previous layers, similar 
#to a ResNet

class ToyDeepGPHiddenLayer(DeepGPLayer):
    def __init__(self, input_dims, output_dims, num_inducing=128, mean_type='constant'):   #num_inducing=128(before)
        if output_dims is None:
            inducing_points = torch.randn(num_inducing, input_dims)
            batch_shape = torch.Size([])
        else:
            inducing_points = torch.randn(output_dims, num_inducing, input_dims)
            batch_shape = torch.Size([output_dims])

        variational_distribution = CholeskyVariationalDistribution(
            num_inducing_points=num_inducing,
            batch_shape=batch_shape
        )

        variational_strategy = VariationalStrategy(
            self,
            inducing_points,
            variational_distribution,
            learn_inducing_locations=True
        )

        super(ToyDeepGPHiddenLayer, self).__init__(variational_strategy, input_dims, output_dims)

        if mean_type == 'constant':
            self.mean_module = ConstantMean(batch_shape=batch_shape)
        else:
            self.mean_module = LinearMean(input_dims)
            #Scalekernel???
        self.covar_module = ScaleKernel(
            RBFKernel(batch_shape=batch_shape, ard_num_dims=input_dims),
            batch_shape=batch_shape, ard_num_dims=None
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return MultivariateNormal(mean_x, covar_x)

    def __call__(self, x, *other_inputs, **kwargs):
        """
        Overriding __call__ isn't strictly necessary, but it lets us add concatenation based skip connections
        easily. For example, hidden_layer2(hidden_layer1_outputs, inputs) will pass the concatenation of the first
        hidden layer's outputs and the input data to hidden_layer2.
        """
        if len(other_inputs):
            if isinstance(x, gpytorch.distributions.MultitaskMultivariateNormal):
                x = x.rsample()

            processed_inputs = [
                inp.unsqueeze(0).expand(self.num_samples, *inp.shape)
                for inp in other_inputs
            ]

            x = torch.cat([x] + processed_inputs, dim=-1)

        return super().__call__(x, are_samples=bool(len(other_inputs)))

#Building the deep GP

num_output_dims = 2

class DeepGP(DeepGP):
    def __init__(self, train_x_shape):
        hidden_layer_1 = ToyDeepGPHiddenLayer(
            input_dims=train_x_shape[-1],
            output_dims=num_output_dims,
            mean_type='lmeaninear',
        )
        
        hidden_layer_2 = ToyDeepGPHiddenLayer(
            input_dims=hidden_layer_1.output_dims,
            output_dims=num_output_dims,
            mean_type='linear',
        )
        
        hidden_layer_3 = ToyDeepGPHiddenLayer(
            input_dims=hidden_layer_2.output_dims,
            output_dims=num_output_dims,
            mean_type='linear',
        )
        hidden_layer_4 = ToyDeepGPHiddenLayer(
            input_dims=hidden_layer_3.output_dims,
            output_dims=num_output_dims,
            mean_type='linear',
        )
        last_layer = ToyDeepGPHiddenLayer(
            input_dims=hidden_layer_4.output_dims,
            output_dims=None,
            mean_type='constant',
        )

        super().__init__()

        self.hidden_layer_1 = hidden_layer_1
        self.hidden_layer_2 = hidden_layer_2
        self.hidden_layer_3 = hidden_layer_3
        self.hidden_layer_4 = hidden_layer_4
        self.last_layer = last_layer
        
#         self. likelihood = gpytorch.likelihoods.GaussianLikelihood(noise_constraint=gpytorch.constraints.GreaterThan(1e-11))
#         self.likelihood.register_prior("noise_prior", gpytorch.priors.HorseshoePrior(0.2), "noise")
#         self.likelihood.noise = 1e-2
        self.likelihood = GaussianLikelihood()

    def forward(self, inputs):
        hidden_rep1 = self.hidden_layer_1(inputs)
        hidden_rep2 = self.hidden_layer_2(hidden_rep1)
        hidden_rep3= self.hidden_layer_3(hidden_rep2)
        hidden_rep4= self.hidden_layer_4(hidden_rep3)
        output = self.last_layer(hidden_rep4)
        return output

    def predict(self, test_loader):
        with torch.no_grad():
            mus = []
            variances = []
            lls = []
            for x_batch, y_batch in test_loader:
                preds = self.likelihood(self(x_batch))
                mus.append(preds.mean)
                variances.append(preds.variance)
                lls.append(model.likelihood.log_marginal(y_batch, model(x_batch)))
                print(x_batch)
                print(mus)

        return torch.cat(mus, dim=-1), torch.cat(variances, dim=-1), torch.cat(lls, dim=-1)

    
model = DeepGP(train_x.shape)
if torch.cuda.is_available():
    model = model.cuda()

培训代码：

#training the data
num_epochs = 50
num_samples = 3 if smoke_test else 10


optimizer = torch.optim.Adam([
    {'params': model.parameters()},
], lr=0.1)
mll = DeepApproximateMLL(VariationalELBO(model.likelihood, model, train_x.shape[-2]))

epochs_iter = tqdm.notebook.tqdm(range(num_epochs), desc="Epoch")
for i in epochs_iter:
    # Within each iteration, we will go over each minibatch of data
    minibatch_iter = tqdm.notebook.tqdm(train_loader, desc="Minibatch", leave=False)
    for x_batch, y_batch in minibatch_iter:
        with gpytorch.settings.num_likelihood_samples(num_samples):
            optimizer.zero_grad()
            output = model(x_batch)
            loss = -mll(output, y_batch)
            loss.backward()
            optimizer.step()
            minibatch_iter.set_postfix(loss=loss.item())

测试代码：

import gpytorch
import math


test_dataset = TensorDataset(test_x, test_y)
test_loader = DataLoader(test_dataset, batch_size=1024)
print(test_x.shape)

model.eval()
predictive_means, predictive_variances, test_lls = model.predict(test_loader)

rmse = torch.mean(torch.pow(predictive_means.mean(0) - test_y, 2)).sqrt()
print(f"RMSE: {rmse.item()}, NLL: {-test_lls.mean().item()}")

提前非常感谢。

regression - 深度高斯过程超参数

0 回答 0

Related

Reference