2

我正在使用随机森林进行预测,我想知道 y 代码有什么问题,并且编码是否正确完成`

 import warnings
 import pandas as pd
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 warnings.filterwarnings("ignore")

 train = pd.io.parsers.read_csv("train.csv")
 test = pd.io.parsers.read_csv('train.csv')

 train.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
 train = train.replace('�', 0, regex=True)

 test.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
 test = test.replace('�', 0, regex=True)

 # train_Y = le.transform(train_Y[VCH_CATG].astype(str))
 # train_Y = train.VCH_CATG
 train_predictor_columns = ['COLOUR', 'FUEL', 'MAKER', 'MAKER_MODEL', 'MANU_YEAR', 
 'MODEL_NAME','OWNER CODE','OWNER SR','PURCHASE DATE', 'REGN_DATE', 'REGN_TYPE','RTO_CD']
 train_X = train[train_predictor_columns]
 test_X = test[train_predictor_columns]

 # label encode the categorical values and convert them to numbers
 le = LabelEncoder()
 le.fit(train['VCH_CATG'].astype(str))
 train_Y = le.transform(train['VCH_CATG'].astype(str))
 for i in train_predictor_columns:
     le.fit(train_X[i].astype(str))
     train_X[i] = le.transform(train_X[i].astype(str))
     test_X[i] = le.transform(test_X[i].astype(str))

 # train the model
 my_model = RandomForestRegressor()
 my_model.fit(train_X, train_Y)

 predictions = my_model.predict(test_X)
 print(predictions)
 print("-------------------------------------------------------------")
 print(list(le.inverse_transform(predictions)))

`请不要分享我尝试过但没有帮助我的其他stackoverflow链接,请不要复制粘贴官方文档中的示例

4

0 回答 0