0

当我在 .predict 函数中传递 freq_representation 时,它给了我错误 ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list)。如何解决?(查看代码的下半部分)。我附上了完整的代码,以便人们更好地解释。

from_pretrained('distilbert-base-uncased')
dbert_model = TFDistilBertModel.from_pretrained('distilbert-base-uncased')
max_len=50
# Function for model creation

def create_model():
    inps = Input(shape = (max_len,), dtype='int64')
    masks= Input(shape = (max_len,), dtype='int64')
    dbert_layer = dbert_model(inps, attention_mask=masks)[0][:,0,:]
    freq_layer = Input(shape = (len(dbert_tokenizer.vocab)-1), dtype='float64') # the first token (PAD token) is not used in the tokens frequencies
    dense0 = Dense(50,activation='relu',kernel_regularizer=regularizers.l2(0.01))(freq_layer)
    dropout0= Dropout(0.5)(dense0)
    #concatted=dbert_layer
    concatted = Concatenate()([dbert_layer, dropout0])
    dense = Dense(512,activation='relu',kernel_regularizer=regularizers.l2(0.01))(concatted)
    dropout= Dropout(0.5)(dense)
    pred = Dense(1, activation='relu',kernel_regularizer=regularizers.l2(0.01))(dropout)
    model = tf.keras.Model(inputs=[inps,masks,freq_layer], outputs=pred)
    #print(model.summary())
    return model

# Tokenizer vocabulary update (add htg, mtn, url and rtw tokens)

dbert_tokenizer.vocab["htg"]=len(dbert_tokenizer.vocab)
dbert_tokenizer.vocab["mtn"]=len(dbert_tokenizer.vocab)
dbert_tokenizer.vocab["url"]=len(dbert_tokenizer.vocab)
dbert_tokenizer.vocab["rtw"]=len(dbert_tokenizer.vocab)

dbert_model.resize_token_embeddings(len(dbert_tokenizer))

model=create_model()
#Prediction part
model.compile(
  optimizer=tf.keras.optimizers.Adam(
    beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,name='Adam'),
  loss=keras.losses.MeanSquaredError(),
  metrics=[tf.keras.metrics.MeanSquaredError(name="mean_squared_error")]
)

bert_inps=dbert_tokenizer.encode_plus(text,
                                           add_special_tokens = True,max_length=50,
                                           pad_to_max_length = True, return_attention_mask = True,truncation=True)

batch_size = 1
freq_representation = np.array(...) # freq_representation is a numpy array of size 30,525 with the frequency of each token -at the desired time window- of dbert_tokenizer.vocab (using the same token order) without the first token (the CLS token).
preds = model.predict([bert_inps['input_ids'],bert_inps['attention_mask'],freq_representation],batch_size=batch_size)
4

0 回答 0