我正在尝试在 Keras 中实现这个https://arxiv.org/abs/1603.01354,但是卷积层有问题。我不明白如何设置像 nb_filter ecc 这样的超参数,实际上真正的问题是将最大池输出与嵌入词层输出相匹配。
MAX_CHARACTER_LENGTH 61 MAX_SEQUENCE_LENGTH 124
输入数据
x_train = pad_sequences(dataset.token_indices['train'], maxlen=MAX_SEQUENCE_LENGTH)
x_char_train= np.array(dataset.character_indices['train'])
x_char_train=x_char_train.reshape((x_char_train.shape[0], x_char_train.shape[1]*x_char_train.shape[2]))
y_train = pad_sequences(dataset.label_indices['train'], maxlen=MAX_SEQUENCE_LENGTH)
x_val = pad_sequences(dataset.token_indices['valid'], maxlen=MAX_SEQUENCE_LENGTH)
x_char_val= np.array(dataset.character_indices['valid'])
x_char_val=x_char_val.reshape((x_char_val.shape[0], x_char_val.shape[1]*x_char_val.shape[2]))
y_val = pad_sequences(dataset.label_indices['valid'], maxlen=MAX_SEQUENCE_LENGTH)
y_train=np.expand_dims(y_train,-1)
y_val=np.expand_dims(y_val,-1)
该模型
EMBEDDING_WORD_DIM= 100
EMBEDDING_CHAR_DIM= 30
N_FILTERS=20
embedding_char= True
class Model(object):
def __init__(self, embedding_weights, dictonary_size, MAX_SEQUENCE_LENGTH,MAX_CHARACTER_LENGTH, alfabeth_size, tags):
word_input= Sequential()
word_input.add(Embedding(dictonary_size + 1,
EMBEDDING_WORD_DIM,
weights=[embedding_weights],
input_length=MAX_SEQUENCE_LENGTH,trainable=False))
if embedding_char:
character_input=Sequential()
character_input.add(Embedding(alfabeth_size+1,
EMBEDDING_CHAR_DIM,
input_length=MAX_SEQUENCE_LENGTH*MAX_CHARACTER_LENGTH))
print(character_input.output_shape)
character_input.add(Reshape((character_input.output_shape[2],character_input.output_shape[1])))
print(character_input.output_shape)
character_input.add(Convolution1D(nb_filter=N_FILTERS, filter_length=3, activation='tanh'))
print(character_input.output_shape)
character_input.add(MaxPooling1D(pool_length=N_FILTERS))
print(character_input.output_shape)
character_input.add(Reshape((MAX_SEQUENCE_LENGTH,N_FILTERS)))
self.model=Sequential()
self.model.add(Merge([word_input,character_input], mode='concat'))
else:
self.model=word_input
self.model.add(Dropout(0.5))
self.model.add(Bidirectional(LSTM(MAX_SEQUENCE_LENGTH, return_sequences=True)))
self.model.add(Dropout(0.5))
self.model.add(TimeDistributed(Dense(tags)))
crf=ChainCRF()
self.model.add(crf)
self.model.compile(loss=crf.loss, optimizer='rmsprop', metrics=['accuracy'])
形状:初始(14041,7564),嵌入后(None,7564,30),reshape1后(None,30,7564),conv1d后(None,28,20),maxpool1d后(None,1,20)