我使用 PyTorch API 和 Optuna 创建了以下机器学习模型类。
class MultiClassClassifer_Optuna_beta(nn.Module):
def __init__(self, trial, vocab_size, input_dim, output_dim, activation):
#Constructor
super(MultiClassClassifer_Optuna_beta, self).__init__()
self.layers = []
#embedding layer
padding_idx = TEXT.vocab['<pad>']
embedding_dim = trial.suggest_int("embedding_dimension", 25, 50)
self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
self.embedding.weight.requires_grad = True
#append embedding layer to layers list
self.layers.append(self.embedding)
#input layer
linear_hidden_units = trial.suggest_int("linear_input_dim", 8, 16) #hidden units of the input layer.
self.hiddenLayer1 = nn.Linear(input_dim*embedding_dim, linear_hidden_units) #output_dim, hidden_dim_2
self.batchnorm1 = nn.BatchNorm1d(linear_hidden_units)
#append input layer to layers list
self.layers.append(self.hiddenLayer1)
n_layers = trial.suggest_int("n_layers", 0, 2) #either 0, 1 or 2 extra hidden layers
for i in range(n_layers):
#output_dim = int(trial.suggest_loguniform("n_units_l{}".format(i), 4, 128))
out_features = trial.suggest_int("n_units_l{}".format(i), 4, 8)
self.layers.append(nn.Linear(linear_hidden_units, out_features))
self.layers.append(nn.ReLU())
linear_hidden_units = out_features
#output layer
self.output = nn.Linear(linear_hidden_units, output_dim)
#append output layer to layers list
self.layers.append(self.output)
#activation function
if activation == "ReLU":
self.activation = nn.ReLU()
else:
self.activation = nn.Tanh()
#append activation function to layers list
self.layers.append(self.activation)
#initialize weights of embedding layer
self.init_weights()
def init_weights(self):
initrange = 1.0
self.embedding.weight.data.uniform_(-initrange, initrange)
def forward(self, text):
#What adjustment to apply here to include number of layers?
embedded = self.embedding(text).view(text.size()[0], -1)
embedded = self.activation(embedded)
print("embedding",embedded.shape)
input_layer = self.hiddenLayer1(embedded)
input_layer = self.activation(input_layer)
print("input",input_layer.shape)
for layer in self.layers:
extra_hidden_layer = layer(input_layer.long())
print("extra_hidden_layer",extra_hidden_layer.shape)
#extra_hidden_layer = self.activation(extra_hidden_layer)
output_layer = self.output(extra_hidden_layer)
output_layer = self.activation(output_layer)
return output_layer
NN的默认结构是:
- 嵌入层(必须存在)
- 输入层(必须存在)
- 0、1、2隐藏层(可选,参数调整)
- 输出层(必须存在)
我想搜索是否应该存在额外的 1 或 2 个隐藏层,或者是否需要更多的隐藏层 (0)。
上面创建init()方法的代码是从这里得到启发的。创建forward()方法的代码是从这里得到启发的。
更新
但是,当我从 API 运行 optuna 试验时,我得到以下异常的回报:
study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=10), pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=20)
RuntimeError: mat1 和 mat2 形状不能相乘(64x14 和 3840x14)
NN模型层的打印语句:
embedding torch.Size([64, 3840])
input torch.Size([64, 14])
extra_hidden_layer torch.Size([64, 14, 30])
我怎样才能解决这个问题?
这是我正在使用的 colab 笔记本。