我正在构建一个模型,其中 LSTM 层堆叠在 CNN 层上。此外,在这里我正在执行多通道 CNN,在每个通道之上,我正在应用 LSTM 层,然后将结果连接起来。
这里的问题是,在 CNN 上实现 TimeDistributed 层并将嵌入层(形状 - (None,300(seq.len),300(emb. dim.)) 传递给它向我展示了输入形状的值错误即expected dim = 3 , but passed dim =2. Input shape =(None,300-->(emb. dim.))
。但是当我检查嵌入维度时,它们是正确的,即(None,300,300)
。
sequence_length = 300
embed_dim = 300
class hyper():
def __init__(self,embedding_dim,filter_sizes,num_filters,dropout_prob,hidden_dims,batch_size,num_epochs):
# Model Hyperparameters
self.embedding_dim = embedding_dim
self.filter_sizes = filter_sizes
self.num_filters = num_filters
self.dropout_prob = dropout_prob
self.hidden_dims = hidden_dims
# Training parameters
self.batch_size = batch_size
self.num_epochs = num_epochs
class prep_hyper():
def __init__(self,sequenceLength,max_words):
# Prepossessing parameters
self.sequenceLength = sequenceLength
self.max_words = max_words
m_hyper=hyper(embedding_dim=embed_dim,filter_sizes=(3,4,5,6,8),num_filters=128,dropout_prob=(0.2,0.5),
hidden_dims=128,batch_size=64,num_epochs= 50)
pr_hyper = prep_hyper(sequenceLength=sequence_length,max_words=vocab_size)
from keras.layers import Dense,TimeDistributed, Dropout, Input,LeakyReLU, GlobalMaxPooling1D, Convolution1D, Embedding,SpatialDropout1D,BatchNormalization
from keras.layers.merge import Concatenate
from keras.models import Model
from keras.utils.vis_utils import plot_model
from keras import regularizers,layers
from keras.layers import Bidirectional
from keras.callbacks import ReduceLROnPlateau,EarlyStopping
def build_model(pr_hyper,m_hyper):
model_input = Input(shape=(pr_hyper.sequenceLength))
embedding = Embedding(pr_hyper.max_words, m_hyper.embedding_dim,weights=[emb],trainable=False)(model_input)
print(embedding.shape) #---> (None,300,300)
## CONVULATIONAL
conv_kern_reg = regularizers.l2(0.0001)
conv_bias_reg = regularizers.l2(0.0001)
conv_blocks = []
for sz in m_hyper.filter_sizes:
conv = TimeDistributed(Convolution1D(filters=m_hyper.num_filters,
kernel_size=sz,
#padding="same",
activation="relu",
strides=1,
#kernel_regularizer=conv_kern_reg,
#bias_regularizer=conv_bias_reg
))(embedding) #---> showing error that shape coming is (None,300)
conv = TimeDistributed(GlobalMaxPooling1D())(conv)
conv = TimeDistributed(Flatten())(conv)
lstm = Bidirectional(layers.LSTM(256,activation='relu',return_sequences=True,))(conv)
lstm = BatchNormalization()(lstm)
lstm = Bidirectional(layers.LSTM(128,return_sequences=True,activation='relu'))(lstm)
lstm = BatchNormalization()(lstm)
lstm = Bidirectional(layers.LSTM(128,kernel_regularizer=regularizers.l2(0.01),activation='relu'))(lstm)
conv_blocks.append(lstm)
# merge
x = Concatenate()(conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0]
concat = Dense(m_hyper.hidden_dims, activation="relu")(x)
concat = BatchNormalization()(concat)
concat = Dropout(m_hyper.dropout_prob[1])(concat)
concat = Dense(16)(concat)
concat = BatchNormalization()(concat)
concat = LeakyReLU()(concat)
model_output = Dense(2, activation="softmax")(concat)
model = Model(model_input, model_output)
model.compile(loss="binary_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.0005), metrics=["accuracy"]) #categorical_crossentropy
print(model.summary())
plot_model(model, show_shapes=True, to_file='multichannelCNN&LSTM.png')
return model
但是该模型在没有 TimeDistributed Layer 的情况下工作得非常好(显然当时没有 LSTM 层堆叠在它们之上)。
有什么问题在这里我无法理解。