这是我的代码。
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.tree import DecisionTreeClassifier, plot_tree, export_graphviz, export_text
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_curve, auc, f1_score, roc_auc_score
import warnings; warnings.simplefilter('ignore')
data_files = 'dataset_for_learning_decision_tree.xlsx'
data = pd.read_excel(data_files)
train_data = data[['title','category','processed_title']]
categories=train_data['category']
labels=list(set(categories))
X_train, X_test, y_train, y_test = train_test_split(train_data['processed_title'],train_data['category'],test_size=0.2,random_state=57)
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(X_train)
decisiontree=DecisionTreeClassifier()
model = Pipeline([('vect', vectorizer),
('tfidf', TfidfTransformer()),
('clf', decisiontree),
])
model.fit(X_train,y_train)
predicted = model.predict(X_test)
confusion_matrix(y_test,predicted)
print('accuracy_score',accuracy_score(y_test,predicted))
print('Reporting...')
print(classification_report(y_test,predicted))
import numpy as np
from mlxtend.plotting import plot_decision_regions
X=np.array(X_train)
y=np.array(y_train)
plot_decision_regions(X=X,
y=y,
clf=model.named_steps['clf'])
我想画一个 plot_decision_region。但是,当我执行此代码时,我得到了与标题相同的错误。使用 y=y.astype(np.integer) 运行时,我收到诸如 ValueError: invalid literal for int() with base 10: 'depression' 之类的错误。我应该如何解决它?