allennlp - 微调阅读理解模型

Question

系统

操作系统：Ubuntu Python 版本：3.6.7 AllenNLP 版本：1.0.0

问题：使用以下命令，我可以训练模型，但我不知道权重是正在更新以进行微调还是只是训练新模型？

我正在尝试使用以下命令对新数据（即 SQuAD 格式）的阅读理解模型进行微调：

allennlp train fine_tune_config.json -s fine-tune-test/和以下配置：

{
    "dataset_reader": {
      "type": "squad",
      "token_indexers": {
        "tokens": {
          "type": "single_id",
          "lowercase_tokens": true
        },
        "elmo": {
          "type": "elmo_characters"
        },
        "token_characters": {
          "type": "characters",
          "character_tokenizer": {
            "byte_encoding": "utf-8",
            "start_tokens": [259],
            "end_tokens": [260]
          },
          "min_padding_length": 5
        }
      }
    },
    "train_data_path": "train-v2.0_trimed.json",
    "validation_data_path": "validation_data.json",
    "model": {
      "type": "bidaf",
      "text_field_embedder": {
          "token_embedders": {
              "tokens": {
                  "type": "embedding",
                  "pretrained_file": "glove.6B.100d.txt",
                  "embedding_dim": 100,
                  "trainable": true
              },
              "elmo":{
                  "type": "elmo_token_embedder",
                //   "options_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
                "options_file": "options_file.json",
                //   "weight_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5",
                "weight_file": "elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5",
                  "do_layer_norm": false,
                  "dropout": 0.0
              },
              "token_characters": {
                  "type": "character_encoding",
                  "embedding": {
                  "num_embeddings": 262,
                  "embedding_dim": 16
                  },
                  "encoder": {
                  "type": "cnn",
                  "embedding_dim": 16,
                  "num_filters": 100,
                  "ngram_filter_sizes": [5]
                  },
                  "dropout": 0.2
              }
          }
      },
      "num_highway_layers": 2,
      "phrase_layer": {
        "type": "lstm",
        "bidirectional": true,
        "input_size": 1224,
        "hidden_size": 100,
        "num_layers": 1
      },
      "matrix_attention": {
        "type": "linear",
        "combination": "x,y,x*y",
        "tensor_1_dim": 200,
        "tensor_2_dim": 200
      },
      "modeling_layer": {
        "type": "lstm",
        "bidirectional": true,
        "input_size": 800,
        "hidden_size": 100,
        "num_layers": 2,
        "dropout": 0.2
      },
      "span_end_encoder": {
        "type": "lstm",
        "bidirectional": true,
        "input_size": 1400,
        "hidden_size": 100,
        "num_layers": 1
      },
      "dropout": 0.2
    },
    "data_loader": {
      "batch_sampler": {
        "type": "bucket",
        "batch_size": 40
      }
    },
    "trainer": {
      "num_epochs": 20,
      "grad_norm": 5.0,
      "patience": 10,
      "validation_metric": "+em",
      "learning_rate_scheduler": {
        "type": "reduce_on_plateau",
        "factor": 0.5,
        "mode": "max",
        "patience": 2
      },
      "optimizer": {
        "type": "adam",
        "betas": [0.9, 0.9]
      }
    }
  } 
'''

score 0 · Accepted Answer

This training config will use the pre-trained ELMo weights and start from there. I am not sure about commenting out the "options_file" lines though.

There are other weights in your model, weights which are not part of ELMo. Those will be randomly initialized and then trained.

allennlp - 微调阅读理解模型

1 回答 1

Related

Reference