1

这是我的注意力层

class Attention(Layer):
    def __init__(self, **kwargs):
        self.init = initializers.get('normal')
        self.supports_masking = True
        self.attention_dim = 50
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3
        self.W = K.variable(self.init((input_shape[-1], 1)))
        self.b = K.variable(self.init((self.attention_dim, )))
        self.u = K.variable(self.init((self.attention_dim, 1)))
        self.trainable_weights = [self.W, self.b, self.u]
        super(Attention, self).build(input_shape)

    def compute_mask(self, inputs, mask=None):
        return mask

    def call(self, x, mask=None):
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)
        ait = K.exp(ait)

        if mask is not None:
            ait *= K.cast(mask, K.floatx())

        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)
        return output

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])

我正在尝试将 CNN 与注意力网络结合起来进行文本分类。以下是我在 keras 中的代码:-

def inputs_and_embeddings(features, config):
        inputs, embeddings = [], []
        for f in features:
            E = Embedding if not config.fixed_embedding else FixedEmbedding
            # i = Input(shape=(config.doc_size,), dtype='int32', name=f.name)
            i = Input(shape=(config.doc_size,), dtype='int32', name=f.name)
            e = E(f.input_dim, f.output_dim, weights=[f.weights],
                  input_length=config.doc_size)(i)
            inputs.append(i)
            embeddings.append(e)
            return inputs, embeddings

inputs, embeddings = inputs_and_embeddings(features, config)
      #calculating the size of documents and all features.
    seq = concat(embeddings)
    cshape = (config.doc_size, sum(f.output_dim for f in features)) 
    seq = Reshape((1,)+cshape)(seq)

    #seq = Reshape((1, config.doc_size, w2v.output_dim))(embeddings) #old way of doing the above

    # seq = Bidirectional()
    # Convolution(s)
    convLayers = []
    for filter_size, filter_num in zip(config.filter_sizes, config.filter_nums):
        seq2 = Convolution2D(
            filter_num,
            filter_size,
            cshape[1],
            border_mode='valid',
            activation='relu',
            dim_ordering='th'
        )(seq)
        seq2 = MaxPooling2D(
            pool_size=(config.doc_size-filter_size+1, 1),
            dim_ordering='th'
        )(seq2)
        # seq2 = Flatten()(seq2)
        convLayers.append(seq2)


    seq = Concatenate(axis=1)(convLayers)
    if config.drop_prob:
        seq = Dropout(config.drop_prob)(seq)
    for s in config.hidden_sizes:
        seq = Dense(s, activation='relu')(seq)

    #need reshaping here
    seq = Reshape((200,3))(seq)
    word_encoder = Bidirectional(GRU(50, return_sequences=True))(seq) 
    rnn_type = 'GRU'  

    dense_transform_word = Dense(
            100, 
            activation='relu', kernel_regularizer=l2_reg,
            name='dense_transform_word')(word_encoder)



        # word attention
    attention_weighted_sentence = Model(
            inputs, Attention(name="word_attention")(dense_transform_word))

    word_attention_model = attention_weighted_sentence

    attention_weighted_sentence.summary()

        # sentence-attention-weighted document scores

    texts_in = Input(shape=(MAX_SEQ_LEN,config.doc_size), dtype='int32', name="input_2")

    attention_weighted_sentences = TimeDistributed(attention_weighted_sentence)(texts_in)



    if rnn_type is 'GRU':
            #sentence_encoder = Bidirectional(GRU(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.2))(attention_weighted_sentences)
            dropout = Dropout(0.1)(attention_weighted_sentences)
            sentence_encoder = Bidirectional(GRU(50, return_sequences=True))(dropout)
    else:
            sentence_encoder = Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.2))(attention_weighted_sentences)


    dense_transform_sentence = Dense(
            100, 
            activation='relu', 
            name='dense_transform_sentence',
            kernel_regularizer=l2_reg)(sentence_encoder)

        # sentence attention
    attention_weighted_text = Attention(name="sentence_attention")(dense_transform_sentence)


    prediction = Dense(19, activation='sigmoid')(attention_weighted_text)

    model = Model(inputs, prediction)
    model.summary()

当我使用输入和预测初始化模型时,我收到错误消息 Graph disconnected 错误,如代码所示。在研究中,我发现当输入和输出之间没有连接时会发生此错误。但是,我无法弄清楚我的模型的输入。谁能帮我解决这个问题?

4

1 回答 1

0
def inputs_and_embeddings(features, config):
    inputs, embeddings = [], []
    for f in features:
        E = Embedding if not config.fixed_embedding else FixedEmbedding
        # i = Input(shape=(config.doc_size,), dtype='int32', name=f.name)
        i = Input(shape=(config.doc_size,), dtype='int32', name=f.name)
        e = E(f.input_dim, 
              f.output_dim, 
              weights=[f.weights],
              input_length=config.doc_size)(i)
        inputs.append(i)
        embeddings.append(e)
        return inputs, embeddings

inputs, embeinputsddings = inputs_and_embeddings(features, config)
#calculating the size of documents and all features.
seq = concat(embeddings)
cshape = (config.doc_size, sum(f.output_dim for f in features)) 
seq = Reshape((1,)+cshape)(seq)

#seq = Reshape((1, config.doc_size, w2v.output_dim))(embeddings) #old way of doing the above

# seq = Bidirectional()
# Convolution(s)
convLayers = []
for filter_size, filter_num in zip(config.filter_sizes, config.filter_nums):
    seq2 = Convolution2D(
        filter_num,
        filter_size,
        cshape[1],
        border_mode='valid',
        activation='relu',
        dim_ordering='th'
    )(seq)
    seq2 = MaxPooling2D(
        pool_size=(config.doc_size-filter_size+1, 1),
        dim_ordering='th'
    )(seq2)
    # seq2 = Flatten()(seq2)
    convLayers.append(seq2)


seq = Concatenate(axis=1)(convLayers)
if config.drop_prob:
    seq = Dropout(config.drop_prob)(seq)
for s in config.hidden_sizes:
    seq = Dense(s, activation='relu')(seq)

#need reshaping here
seq = Reshape((200,3))(seq)
word_encoder = Bidirectional(GRU(50, return_sequences=True))(seq) 
rnn_type = 'GRU'  

dense_transform_word = Dense(
        100, 
        activation='relu', kernel_regularizer=l2_reg,
        name='dense_transform_word')(word_encoder)


outputs = Attention(name="word_attention")(dense_transform_word)
    # word attention
attention_weighted_sentence = Model(
        inputs, outputs)

word_attention_model = attention_weighted_sentence

attention_weighted_sentence.summary()

    # sentence-attention-weighted document scores

texts_in = Input(shape=(MAX_SEQ_LEN,config.doc_size), dtype='int32', name="input_2")

attention_weighted_sentences = TimeDistributed(outputs)(texts_in)



if rnn_type is 'GRU':
        #sentence_encoder = Bidirectional(GRU(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.2))(attention_weighted_sentences)
        dropout = Dropout(0.1)(attention_weighted_sentences)
        sentence_encoder = Bidirectional(GRU(50, return_sequences=True))(dropout)
else:
        sentence_encoder = Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.2))(attention_weighted_sentences)


dense_transform_sentence = Dense(
        100, 
        activation='relu', 
        name='dense_transform_sentence',
        kernel_regularizer=l2_reg)(sentence_encoder)

    # sentence attention
attention_weighted_text = Attention(name="sentence_attention")(dense_transform_sentence)


prediction = Dense(19, activation='sigmoid')(attention_weighted_text)

model = Model([inputs, texts_in], prediction)
model.summary()
于 2019-07-22T13:02:42.097 回答