我不适合模型变形金刚
import pandas as pd
import numpy as np
from transformers import BertTokenizer
from transformers import TFAutoModel
import tensorflow as tf
df = pd.read_csv('train.tsv', sep='\t')
seq_len = 512
num_samples = len(df)
num_samples, seq_len
# initialize tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
# tokenize - this time returning Numpy tensors
tokens = tokenizer(df['Phrase'].tolist(), max_length=seq_len, truncation=True,
padding='max_length', add_special_tokens=True,
return_tensors='np')
with open('movie-xids.npy', 'wb') as f:
np.save(f, tokens['input_ids'])
with open('movie-xmask.npy', 'wb') as f:
np.save(f, tokens['attention_mask'])
arr = df['Sentiment'].values
labels = np.zeros((num_samples, arr.max()+1))
labels[np.arange(num_samples), arr] = 1
with open('movie-labels.npy', 'wb') as f:
np.save(f, labels)
bert = TFAutoModel.from_pretrained('bert-base-cased')
# two input layers, we ensure layer name variables match to dictionary keys in TF dataset
input_ids = tf.keras.layers.Input(shape=(512,), name='input_ids', dtype='int32')
mask = tf.keras.layers.Input(shape=(512,), name='attention_mask', dtype='int32')
# we access the transformer model within our bert object using the bert attribute (eg bert.bert instead of bert)
embeddings = bert.bert(input_ids, attention_mask=mask)[1] # access final activations (alread max-pooled) [1]
# convert bert embeddings into 5 output classes
x = tf.keras.layers.Dense(1024, activation='relu')(embeddings)
y = tf.keras.layers.Dense(5, activation='softmax', name='outputs')(x)
# initialize model
model = tf.keras.Model(inputs=[input_ids, mask], outputs=y)
# freeze bert layer
model.layers[2].trainable = False
optimizer = tf.keras.optimizers.Adam(lr=1e-5, decay=1e-6)
loss = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
model.compile(optimizer=optimizer, loss=loss, metrics=[acc])
element_spec = ({'input_ids': tf.TensorSpec(shape=(16, 512), dtype=tf.float64, name=None),
'attention_mask': tf.TensorSpec(shape=(16, 512), dtype=tf.float64, name=None)},
tf.TensorSpec(shape=(16, 5), dtype=tf.float64, name=None))
# load the training and validation sets
train_ds = tf.data.experimental.load('train', element_spec=element_spec)
val_ds = tf.data.experimental.load('val', element_spec=element_spec)
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=3
)
结果
InvalidArgumentError:组件 0 处的数据类型不匹配:预期为 double 但得到 int32。
[[节点IteratorGetNext(定义在\KA6C9~1.ABE\AppData\Local\Temp/ipykernel_4340/3491990169.py:67)]] [Op:__inference_train_function_17352]
函数调用栈:train_function