这是我的注意力层
class Attention(Layer):
def __init__(self, **kwargs):
self.init = initializers.get('normal')
self.supports_masking = True
self.attention_dim = 50
super(Attention, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
self.W = K.variable(self.init((input_shape[-1], 1)))
self.b = K.variable(self.init((self.attention_dim, )))
self.u = K.variable(self.init((self.attention_dim, 1)))
self.trainable_weights = [self.W, self.b, self.u]
super(Attention, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
return mask
def call(self, x, mask=None):
uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
ait = K.dot(uit, self.u)
ait = K.squeeze(ait, -1)
ait = K.exp(ait)
if mask is not None:
ait *= K.cast(mask, K.floatx())
ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
ait = K.expand_dims(ait)
weighted_input = x * ait
output = K.sum(weighted_input, axis=1)
return output
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[-1])
我正在尝试将 CNN 与注意力网络结合起来进行文本分类。以下是我在 keras 中的代码:-
def inputs_and_embeddings(features, config):
inputs, embeddings = [], []
for f in features:
E = Embedding if not config.fixed_embedding else FixedEmbedding
# i = Input(shape=(config.doc_size,), dtype='int32', name=f.name)
i = Input(shape=(config.doc_size,), dtype='int32', name=f.name)
e = E(f.input_dim, f.output_dim, weights=[f.weights],
input_length=config.doc_size)(i)
inputs.append(i)
embeddings.append(e)
return inputs, embeddings
inputs, embeddings = inputs_and_embeddings(features, config)
#calculating the size of documents and all features.
seq = concat(embeddings)
cshape = (config.doc_size, sum(f.output_dim for f in features))
seq = Reshape((1,)+cshape)(seq)
#seq = Reshape((1, config.doc_size, w2v.output_dim))(embeddings) #old way of doing the above
# seq = Bidirectional()
# Convolution(s)
convLayers = []
for filter_size, filter_num in zip(config.filter_sizes, config.filter_nums):
seq2 = Convolution2D(
filter_num,
filter_size,
cshape[1],
border_mode='valid',
activation='relu',
dim_ordering='th'
)(seq)
seq2 = MaxPooling2D(
pool_size=(config.doc_size-filter_size+1, 1),
dim_ordering='th'
)(seq2)
# seq2 = Flatten()(seq2)
convLayers.append(seq2)
seq = Concatenate(axis=1)(convLayers)
if config.drop_prob:
seq = Dropout(config.drop_prob)(seq)
for s in config.hidden_sizes:
seq = Dense(s, activation='relu')(seq)
#need reshaping here
seq = Reshape((200,3))(seq)
word_encoder = Bidirectional(GRU(50, return_sequences=True))(seq)
rnn_type = 'GRU'
dense_transform_word = Dense(
100,
activation='relu', kernel_regularizer=l2_reg,
name='dense_transform_word')(word_encoder)
# word attention
attention_weighted_sentence = Model(
inputs, Attention(name="word_attention")(dense_transform_word))
word_attention_model = attention_weighted_sentence
attention_weighted_sentence.summary()
# sentence-attention-weighted document scores
texts_in = Input(shape=(MAX_SEQ_LEN,config.doc_size), dtype='int32', name="input_2")
attention_weighted_sentences = TimeDistributed(attention_weighted_sentence)(texts_in)
if rnn_type is 'GRU':
#sentence_encoder = Bidirectional(GRU(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.2))(attention_weighted_sentences)
dropout = Dropout(0.1)(attention_weighted_sentences)
sentence_encoder = Bidirectional(GRU(50, return_sequences=True))(dropout)
else:
sentence_encoder = Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.2))(attention_weighted_sentences)
dense_transform_sentence = Dense(
100,
activation='relu',
name='dense_transform_sentence',
kernel_regularizer=l2_reg)(sentence_encoder)
# sentence attention
attention_weighted_text = Attention(name="sentence_attention")(dense_transform_sentence)
prediction = Dense(19, activation='sigmoid')(attention_weighted_text)
model = Model(inputs, prediction)
model.summary()
当我使用输入和预测初始化模型时,我收到错误消息 Graph disconnected 错误,如代码所示。在研究中,我发现当输入和输出之间没有连接时会发生此错误。但是,我无法弄清楚我的模型的输入。谁能帮我解决这个问题?