0

我想用我自己的数据替换虹膜数据。请告诉我要遵循哪些步骤来做到这一点?谢谢

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import sklearn 
from sklearn.cluster import KMeans 
from mpl_toolkits.mplot3d import Axes3D 
from sklearn.preprocessing import scale 
import sklearn.metrics as sm 
from sklearn import datasets 
from sklearn.metrics import confusion_matrix,classification_report                              import matplotlib.pyplot as plt plt.rc('figure', figsize=(7,4)) 
iris = datasets.load_iris() 
X = scale(iris.data) 
Y = pd.DataFrame(iris.target) 
variable_name = iris.feature_names X[0:10,] 
clustering = KMeans(n_clusters=3,random_state=5) 
clustering.fit(X) 
iris_df = pd.DataFrame(iris.data) 
iris_df.columns=['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width'] Y.columns = ['Targets']
4

2 回答 2

0
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import sklearn 
from sklearn.cluster import KMeans 
from mpl_toolkits.mplot3d import Axes3D 
from sklearn.preprocessing import scale 
import sklearn.metrics as sm 
from sklearn import datasets 
from sklearn.metrics import confusion_matrix,classification_report   

# CHANGED CODE START
df = pd.read_excel('tmp.xlsx') 
Y = df['target']
X = df.drop(['target'], axis=1)
# CHANGED CODE END

variable_name = X.columns 
clustering = KMeans(n_clusters=3,random_state=5) 
clustering.fit(X) 
于 2019-08-28T12:12:21.970 回答
0

导入部分将保持不变。

假设您有一个数据框:

#read your dataframe(several types possible)
df = pd.read_csv('test.csv')
#you need to define a target variable (named target in my case) and the features X
Y = df['target']
X = df.drop(['target'], axis=1)
#here your k-means algorithm gets start
clustering = KMeans(n_clusters=3,random_state=5) 
clustering.fit(X)

让我再想一想,你在用什么kmeans?它是一种无监督的学习方法,所以你没有任何目标变量,那你在做什么?通常应该是:

df = pd.read_csv('test.csv')
#columns header you want to use
relevant_columns = ['A', 'B']
X = df[relevant_columns]
clustering = KMeans(n_clusters=3,random_state=5) 
clustering.fit(X)
于 2019-08-28T12:17:36.233 回答