def split_data(path):
df = pd.read_csv(path)
return train_test_split(df , test_size=0.1, random_state=100)
train, test = split_data(DATA_DIR)
train_texts, train_labels = train['text'].to_list(), train['sentiment'].to_list()
test_texts, test_labels = test['text'].to_list(), test['sentiment'].to_list()
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.1, random_state=100)
from transformers import DistilBertTokenizerFast
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
valid_encodings = tokenizer(valid_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)
当我尝试使用 BERT 标记器从数据帧中拆分出来时,我遇到了这样的错误。