0

大家好,我为聊天机器人开发了两个 RNN 模型。假设用户说:“告诉我明天巴黎的天气如何”。第一个模型将能够识别用户的意图 WEATHER_INFO ,而第二个模型将能够从 LOC:Paris 和 DATE:tomorrow 等短语中提取有意义的信息当然还有许多其他意图类别,例如 MUSIC_PLAY 等等. 由于这两个模型没有以任何方式链接到同一个短语,我们可以得到例如结果意图:WEATHER_INFO 和 TITLE:Paris,其中插槽 TITLE 与 MUSIC_PLAY 意图相关联 许多研究人员试图通过创建联合模型来提高性能两个模型使用另一个模型以避免这种错误在这里我的插槽填充代码

from keras.models import Model, Input
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional
import keras as k
from keras_contrib.layers import CRF


# input = Input(shape=(140,))
# input = Input(shape=(len(X_train),max_len))
input = Input(shape=(max_len,))

# word_embedding_size = 150
word_embedding_size = 150
n_words = len(token_ids)


# Embedding Layer
model = Embedding(input_dim=n_words, output_dim=word_embedding_size, input_length=max_len)(input)
# model = Embedding(input_dim=n_words, output_dim=word_embedding_size, input_length=140)(input)

# BI-LSTM Layer
model = Bidirectional(LSTM(units=word_embedding_size,
                           return_sequences=True,
                           dropout=0.5,
                           recurrent_dropout=0.5,
                           kernel_initializer=k.initializers.he_normal()))(model)
model = LSTM(units=word_embedding_size * 2,
             return_sequences=True,
             dropout=0.5,
             recurrent_dropout=0.5,
             kernel_initializer=k.initializers.he_normal())(model)

# TimeDistributed Layer
model = TimeDistributed(Dense(n_tags, activation="relu"))(model)

# CRF Layer
crf = CRF(n_tags)

out = crf(model)  # output
model = Model(input, out)

# In[]

# model compile and fit

from keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt

# Optimiser
adam = k.optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999)

# Compile model
model.compile(optimizer=adam, loss=crf.loss_function, metrics=[crf.accuracy, 'accuracy'])

model.summary()

# Saving the best model only
filepath = "ner-bi-lstm-td-model-{val_accuracy:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

# Fit the best model
history = model.fit(X_train, y_train, batch_size=256, epochs=10, validation_split=0.1, verbose=1,
                    callbacks=callbacks_list)

这里是意图检测NN的代码

#CNN architecture    

from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras import layers

batch_size = 128

epochs = 12
if nn_architecture == 'CNN':
    model_CNN = Sequential()
    e = Embedding(vocab_size, 300, weights=[embedding_matrix], input_length=max_length, trainable=False)
    model_CNN.add(e)
    model_CNN.add(Dropout(0.2))
    # we add a Convolution1D, which will learn filters
    # word group filters of size filter_length:
    filters = 50
    kernel_size = 3    
    hidden_dims = 250
    model_CNN.add(layers.Conv1D(filters,
                     kernel_size,
                     padding='valid',
                     activation='relu',
                     strides=1))
    # we use max pooling:
    model_CNN.add(layers.GlobalMaxPooling1D())

    # We add a vanilla hidden layer:
    model_CNN.add(Dense(hidden_dims))
    model_CNN.add(Dropout(0.2))
    model_CNN.add(layers.Activation('relu'))

    # We project onto a single unit output layer, and squash it with a sigmoid:
    model_CNN.add(Dense(nbClasses)) # no_of_categories
    model_CNN.add(layers.Activation('sigmoid'))

    model_CNN.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    history_CNN =model_CNN.fit(X_train, Y_train_c,
              batch_size=batch_size,
              epochs=epochs,
              #validation_split=0.2
              )
    # Epoch 12/12
    # 38771/38771 [==============================] - 11s 276us/step - 
    #loss: 0.0046 - accuracy: 0.9985

我想要的是合并这两种架构以获得这个 在此处输入图像描述

请帮助我...提前谢谢

4

1 回答 1

0

如今,这几乎不是一个研究问题,但是……这是 github.io 上的文章,它完全符合您的要求 - 在一个模型中结合了意图分类和插槽填充任务。 https://chsasank.github.io/spoken-language-understanding.html

由于只有链接的答案是不受欢迎的,这里也是模型架构——我稍微修改了一下,但总的来说这里是 Keras 代码:

def build_model(self):
    main_input = Input(shape=(15), dtype='int32', name='main_input')
    x = Embedding(output_dim=self.embedding_dimension, input_dim=n_vocab, input_length=15)(main_input)
    x = Convolution1D(64, 5, padding='same', activation='relu')(x)

    if self.dropout_parameter > 0.0:
        x = Dropout(self.dropout_parameter)(x)

    if self.rnn_type is 'GRU':
        rnn = GR    def build_model(self):
    main_input = Input(shape=(15), dtype='int32', name='main_input')
    x = Embedding(output_dim=self.embedding_dimension, input_dim=n_vocab, input_length=15)(main_input)
    x = Convolution1D(64, 5, padding='same', activation='relu')(x)

    if self.dropout_parameter > 0.0:
        x = Dropout(self.dropout_parameter)(x)

    if self.rnn_type is 'GRU':
        rnn = GRU(self.rnn_units, return_sequences=True)

    elif self.rnn_type is 'LSTM':
        rnn = LSTM(self.rnn_units, return_sequences=True)
    else:
        rnn = SimpleRNN(self.rnn_units)

    if self.bidirectional:
        rnn_slot = Bidirectional(rnn)(x)
    else:
        rnn_slot = rnn(x)

    rnn_intent = GRU(self.rnn_units, return_sequences=False)(x)

    if self.maxPooling:
        x = MaxPooling1D(strides=1, padding='same')(x)
        print("Using MaxPooling")
    elif self.averagePooling:
        x = AveragePooling1D(strides=1, padding='same')(x)
        print("Using AveragePooling")
    slot_output = Dense(n_slots, activation='softmax', name='slot_output')(rnn_slot)
    intent_output = Dense(n_classes, activation='softmax', name='intent_output')(rnn_intent)
    model = kerasModel(inputs=[main_input], outputs=[intent_output, slot_output])

    # rmsprop is recommended for RNNs https://stats.stackexchange.com/questions/315743/rmsprop-and-adam-vs-sgd
    model.compile(optimizer='rmsprop', loss={'intent_output': 'categorical_crossentropy', 'slot_output': 'categorical_crossentropy'}, metrics='accuracy')
    model.summary()
    self.model = model

    return 0U(self.rnn_units, return_sequences=True)

    elif self.rnn_type is 'LSTM':
        rnn = LSTM(self.rnn_units, return_sequences=True)
    else:
        rnn = SimpleRNN(self.rnn_units)

    if self.bidirectional:
        rnn_slot = Bidirectional(rnn)(x)
    else:
        rnn_slot = rnn(x)

    rnn_intent = GRU(self.rnn_units, return_sequences=False)(x)

    if self.maxPooling:
        x = MaxPooling1D(strides=1, padding='same')(x)
        print("Using MaxPooling")
    elif self.averagePooling:
        x = AveragePooling1D(strides=1, padding='same')(x)
        print("Using AveragePooling")
    slot_output = Dense(n_slots, activation='softmax', name='slot_output')(rnn_slot)
    intent_output = Dense(n_classes, activation='softmax', name='intent_output')(rnn_intent)
    model = kerasModel(inputs=[main_input], outputs=[intent_output, slot_output])

    # rmsprop is recommended for RNNs https://stats.stackexchange.com/questions/315743/rmsprop-and-adam-vs-sgd
    model.compile(optimizer='rmsprop', loss={'intent_output': 'categorical_crossentropy', 'slot_output': 'categorical_crossentropy'}, metrics='accuracy')
    model.summary()
    self.model = model

    return 0
于 2021-04-29T13:54:39.093 回答