我是初学者,不是以英语为母语的人,所以我可能会问一些糟糕的问题。对不起!
我最近完成了官方的 AllenNLP 教程(https://guide.allennlp.org/training-and-prediction),想将简单分类器的词嵌入更改为 ELMo。
另外,我想让简单分类器的架构更复杂,以提高其准确性。我想我已经完成了模型的实现。
简单分类器.py
@Model.register("simple_classifier")
class SimpleClassifier(Model):
def __init__(
self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder
):
super().__init__(vocab)
self.embedder = embedder
self.encoder = encoder
num_labels = vocab.get_vocab_size("labels")
self.dropout = torch.nn.Dropout(p=0.2)
self.relu = torch.nn.ReLU()
self.layer1=torch.nn.Linear(encoder.get_output_dim(),512)
self.layer2 = torch.nn.Linear(512, 128)
self.layer3 = torch.nn.Linear(128, 50)
self.layer4 = torch.nn.Linear(50, 10)
self.classifier = torch.nn.Linear(10, num_labels)
self.accuracy = CategoricalAccuracy()
def forward(
self, text: TextFieldTensors, label: torch.Tensor = None
) -> Dict[str, torch.Tensor]:
# Shape: (batch_size, num_tokens, embedding_dim)
embedded_text = self.embedder(text)
# Shape: (batch_size, num_tokens)
mask = util.get_text_field_mask(text)
# Shape: (batch_size, encoding_dim)
encoded_text = self.encoder(embedded_text, mask)
x=self.relu(self.layer1(encoded_text))
x=self.relu(self.layer2(x))
x=self.relu(self.layer3(x))
x=self.relu(self.layer4(x))
# Shape: (batch_size, num_labels)
logits = self.classifier(x)
# Shape: (batch_size, num_labels)
probs = torch.nn.functional.softmax(logits)
# Shape: (1,)
output = {"probs": probs}
if label is not None:
self.accuracy(logits, label)
output["loss"] = torch.nn.functional.cross_entropy(logits, label)
return output
def get_metrics(self, reset: bool = False) -> Dict[str, float]:
return {"accuracy": self.accuracy.get_metric(reset)}
但我不知道如何更改配置文件。如何更改官方教程中的以下配置文件以使用ELMo?
my_text_classifier.jsonnet
{
"dataset_reader" : {
"type": "classification-tsv",
"token_indexers": {
"tokens": {
"type": "single_id"
}
}
},
"train_data_path": "data/movie_review/train.tsv",
"validation_data_path": "data/movie_review/dev.tsv",
"model": {
"type": "simple_classifier",
"embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 10
}
}
},
"encoder": {
"type": "bag_of_embeddings",
"embedding_dim": 10
}
},
"data_loader": {
"batch_size": 8,
"shuffle": true
},
"trainer": {
"optimizer": "adam",
"num_epochs": 5
}
}
如果有人可以帮助我,我会很高兴。