0

我正在尝试使用 GridSearchCV 获取随机森林回归器的最佳参数,但是每次运行代码时,我都会得到不同的最佳参数集。

我尝试将 random_state=42 添加到 GridSearchCV,但似乎不可接受。

from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import matplotlib.pyplot as plt
import scipy
import pandas as pd
from sklearn.model_selection import train_test_split


df = pd.read_excel (r'Data_sets.xlsx',sheet_name="Set1")
X = np.transpose([df['b']])
y = np.array(df['d'])


def evaluate(model, test_features, test_labels):
    predictions = model.predict(test_features)
    errors = abs(predictions - test_labels)
    mape = 100 * np.mean(errors / test_labels)
    accuracy = 100 - mape
    print('Model Performance')
    print('Average Error: {:0.4f} degrees.'.format(np.mean(errors))) #Mean Absolute Error
    print('Accuracy = {:0.2f}%.'.format(accuracy))

    return accuracy

def rfr_model(X, y):
    # Split the data into training and testing sets
    train_features, test_features, train_labels, test_labels = train_test_split(X, y, test_size = 0.25, random_state = 42)

    # Perform Grid-Search
    rf_random = GridSearchCV(
    estimator=RandomForestRegressor(),
    param_grid={
        'max_depth': (10,30,50,80,100,150,200,250,300),
        'n_estimators': (10, 50, 100,200,300,400,500,1000),
    },
    cv=10, scoring='neg_mean_absolute_error', verbose=0,n_jobs=-1)

    #fit the model
    rf_random.fit(train_features, train_labels)
    best_random = rf_random.best_estimator_
    print(best_random)
    random_accuracy = evaluate(best_random, test_features, test_labels)
    print(random_accuracy)


rfr_model(X, y)

我想知道为什么每次运行代码时我都会得到不同的 n_estimators 和 max_depth。我是否有导致问题的错误?我希望有人可以指导我,因为这是我第一次处理回归。

4

0 回答 0