作为背景,我指的是用于情感分类的层次注意网络。
对于代码:我的完整代码发布在下面,但这只是作者在上面的链接上发布的原始代码的简单修改。我在下面解释我的变化。 对于训练数据:这里 对于词嵌入:这是 Glove 嵌入这里 关键配置:Keras 2.0.9、Scikit-Learn 0.19.1、Theano 0.9.0
上面链接中发布的原始代码采用 3D 形状输入,即(评论、句子、单词)。注意力机制适用于句子,也适用于单词。所以它有两个注意力组件,你可以在网页的第四个代码块中看到。
我想把它改成只需要一个2D 形状输入的。我这样做是通过
- 更改输入形状和输入嵌入矩阵(请参阅下面我的代码中的内联注释)
- 通过删除句子注意组件来更改模型构建部分,仅保留单词注意组件(请参阅下面我的代码中的内联注释)
但是,当调用“model.fit”时,代码会产生错误。我在下面发布了完整的代码和错误。
代码:
import numpy as np
import pandas as pd
import re
from bs4 import BeautifulSoup
import os
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.utils import plot_model
from keras.utils.np_utils import to_categorical
from keras.layers import Dense, Input
from keras.layers import Embedding, GRU, Bidirectional, TimeDistributed
from keras.models import Model
from keras import backend as K
from keras.engine.topology import Layer
from keras import initializers
MAX_SENT_LENGTH = 100
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2
def clean_str(string):
"""
Tokenization/string cleaning for dataset
Every dataset is lower cased except
"""
string = re.sub(r"\\", "", string)
string = re.sub(r"\'", "", string)
string = re.sub(r"\"", "", string)
return string.strip().lower()
#replace this to your own file path
data_train = pd.read_csv('/home/zz/Work/wop/data/sentiment/labeledTrainData_small.tsv', sep='\t')
print(data_train.shape)
labels = []
texts = []
for idx in range(data_train.review.shape[0]):
text = BeautifulSoup(data_train.review[idx])
text = clean_str(text.get_text().encode('ascii', 'ignore').decode('ascii'))
texts.append(text)
labels.append(data_train.sentiment[idx])
tokenizer = Tokenizer(nb_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(texts)
##################################
# Change 1. The input shape is now 2D (sentence, words) instead of 3D
##################################
data = np.zeros((len(texts), MAX_SENT_LENGTH), dtype='int32')
for i, content in enumerate(texts):
wordTokens = text_to_word_sequence(content)
k = 0
for _, word in enumerate(wordTokens):
if k < MAX_SENT_LENGTH and tokenizer.word_index[word] < MAX_NB_WORDS:
data[i, k] = tokenizer.word_index[word]
k = k + 1
##################################
word_index = tokenizer.word_index
print('Total %s unique tokens.' % len(word_index))
labels = to_categorical(np.asarray(labels))
print('Shape of data tensor:', data.shape)
print('Shape of label tensor:', labels.shape)
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
nb_validation_samples = int(VALIDATION_SPLIT * data.shape[0])
x_train = data[:-nb_validation_samples]
y_train = labels[:-nb_validation_samples]
x_val = data[-nb_validation_samples:]
y_val = labels[-nb_validation_samples:]
print('Number of positive and negative reviews in traing and validation set')
print(y_train.sum(axis=0))
print(y_val.sum(axis=0))
#replace with your own embedding file path
GLOVE_DIR = "/home/zz/Work/data/glove.6B"
embeddings_index = {}
f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'))
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
f.close()
print('Total %s word vectors.' % len(embeddings_index))
embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
# words not found in embedding index will be all-zeros.
embedding_matrix[i] = embedding_vector
# building Hierachical Attention network
embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
# words not found in embedding index will be all-zeros.
embedding_matrix[i] = embedding_vector
embedding_layer = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SENT_LENGTH,
trainable=True,
mask_zero=True)
class AttLayer(Layer):
def __init__(self, attention_dim,**kwargs):
self.init = initializers.get('normal')
self.supports_masking = True
self.attention_dim = attention_dim
super(AttLayer, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
self.b = K.variable(self.init((self.attention_dim,)))
self.u = K.variable(self.init((self.attention_dim, 1)))
self.trainable_weights = [self.W, self.b, self.u]
super(AttLayer, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
return None
def call(self, x, mask=None):
# size of x :[batch_size, sel_len, attention_dim]
# size of u :[batch_size, attention_dim]
# uit = tanh(xW+b)
uit = K.tile(K.expand_dims(self.W, axis=0), (K.shape(x)[0], 1, 1))
uit = tf.matmul(x, uit)
uit = K.tanh(K.bias_add(uit, self.b))
ait = K.dot(uit, self.u)
ait = K.squeeze(ait, -1)
ait = K.exp(ait)
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting in theano
ait *= K.cast(mask, K.floatx())
ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
ait = K.expand_dims(ait)
weighted_input = x * ait
output = K.sum(weighted_input, axis=1)
return output
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[-1])
#################################################
# Change 2. The model contains only one attention block now
#################################################
sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sentence_input)
l_lstm = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences)
l_dense = TimeDistributed(Dense(200))(l_lstm)
l_att = AttLayer(100)(l_dense)
############################################
preds = Dense(2, activation='softmax')(l_att)
model = Model(sentence_input, preds)
#### clone the model #### Line X
model_copy = clone_model(model)
plot_model(model, to_file="model.png")
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
print("model fitting - Hierachical attention network")
model.fit(x_train, y_train, validation_data=(x_val, y_val),
nb_epoch=10, batch_size=50,verbose=2)
ERROR:代码的最后一行生成以下错误跟踪:
Epoch 1/10
Traceback (most recent call last):
File "/home/zz/Programs/anaconda3/lib/python3.6/site-packages/theano/compile/function_module.py", line 884, in __call__
self.fn() if output_subset is None else\
ValueError: Input dimension mis-match. (input[0].shape[1] = 50, input[1].shape[1] = 100)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/zz/Work/wop/code/python/src/3rdparty/han/textClassfierHATT2D.py", line 187, in <module>
nb_epoch=10, batch_size=50,verbose=2)
File "/home/zz/Programs/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1631, in fit
validation_steps=validation_steps)
File "/home/zz/Programs/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1213, in _fit_loop
outs = f(ins_batch)
File "/home/zz/Programs/anaconda3/lib/python3.6/site-packages/keras/backend/theano_backend.py", line 1223, in __call__
return self.function(*inputs)
File "/home/zz/Programs/anaconda3/lib/python3.6/site-packages/theano/compile/function_module.py", line 898, in __call__
storage_map=getattr(self.fn, 'storage_map', None))
File "/home/zz/Programs/anaconda3/lib/python3.6/site-packages/theano/gof/link.py", line 325, in raise_with_op
reraise(exc_type, exc_value, exc_trace)
File "/home/zz/Programs/anaconda3/lib/python3.6/site-packages/six.py", line 692, in reraise
raise value.with_traceback(tb)
File "/home/zz/Programs/anaconda3/lib/python3.6/site-packages/theano/compile/function_module.py", line 884, in __call__
self.fn() if output_subset is None else\
ValueError: Input dimension mis-match. (input[0].shape[1] = 50, input[1].shape[1] = 100)
Apply node that caused the error: Elemwise{mul,no_inplace}(InplaceDimShuffle{x,0}.0, Elemwise{Cast{float32}}.0)
Toposort index: 459
Inputs types: [TensorType(float32, row), TensorType(float32, matrix)]
Inputs shapes: [(1, 50), (50, 100)]
Inputs strides: [(200, 4), (400, 4)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [[Sum{axis=[1], acc_dtype=float64}(Elemwise{mul,no_inplace}.0)]]
HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
我真的很感激对此的一些建议,非常感谢提前!