我正在尝试使用 GridSearchCV 获取随机森林回归器的最佳参数,但是每次运行代码时,我都会得到不同的最佳参数集。
我尝试将 random_state=42 添加到 GridSearchCV,但似乎不可接受。
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import matplotlib.pyplot as plt
import scipy
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_excel (r'Data_sets.xlsx',sheet_name="Set1")
X = np.transpose([df['b']])
y = np.array(df['d'])
def evaluate(model, test_features, test_labels):
predictions = model.predict(test_features)
errors = abs(predictions - test_labels)
mape = 100 * np.mean(errors / test_labels)
accuracy = 100 - mape
print('Model Performance')
print('Average Error: {:0.4f} degrees.'.format(np.mean(errors))) #Mean Absolute Error
print('Accuracy = {:0.2f}%.'.format(accuracy))
return accuracy
def rfr_model(X, y):
# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(X, y, test_size = 0.25, random_state = 42)
# Perform Grid-Search
rf_random = GridSearchCV(
estimator=RandomForestRegressor(),
param_grid={
'max_depth': (10,30,50,80,100,150,200,250,300),
'n_estimators': (10, 50, 100,200,300,400,500,1000),
},
cv=10, scoring='neg_mean_absolute_error', verbose=0,n_jobs=-1)
#fit the model
rf_random.fit(train_features, train_labels)
best_random = rf_random.best_estimator_
print(best_random)
random_accuracy = evaluate(best_random, test_features, test_labels)
print(random_accuracy)
rfr_model(X, y)
我想知道为什么每次运行代码时我都会得到不同的 n_estimators 和 max_depth。我是否有导致问题的错误?我希望有人可以指导我,因为这是我第一次处理回归。