我尝试使用 Hyperopt 优化器调整 scikit GradientBoostingRegressor 模型的超参数。我通过多种方式在 [0.01, 1] 范围内设置 learning_rate 参数的搜索空间(例如:
'learning_rate': hp.quniform('learning_rate', 0.01, 1, 0.05)
或作为简单数组[0.01, 0.02, 0.03, 0.1]
,但是当我运行代码 hyperopt 开始计算时,我收到错误“ValueError:learning_rate must be greater than 0 but was 0”。
我不知道代码中有什么问题,因为零值不在参数的范围内。零值如何发挥作用?
请帮我解决这个问题。
先感谢您。
下面我附上相关代码。注意: RandomForestRegressor 和 ExtraTreesRegressor 方法(估计器)的代码没有问题。
# Varying parameter definition
varying_parameter='All - by Hyperopt'
varying_parameter_item = 'Complete search space'
# Optimization function definition
def gb_mse_cv(params, random_state=random_state, cv=kf, X=train_features, y=train_labels):
# the function gets a set of variable parameters in "param"
params = {'n_estimators': int(params['n_estimators']),
'max_depth': int(params['max_depth']),
'min_samples_split': int(params['min_samples_split']),
'max_features': int(params['max_features']),
'learning_rate': int(params['learning_rate']),
'min_samples_leaf': int(params['min_samples_leaf']),
'subsample': int(params['subsample'])}
# we use this params to create a new LGBM Regressor
model = GradientBoostingRegressor(**params)
# and then conduct the cross validation with the same folds as before
score = -cross_val_score(model, X, y, cv=cv, scoring="neg_mean_absolute_error", n_jobs=-1).mean()
return score
# possible values of parameters
space={'n_estimators': hp.quniform('n_estimators', 50, 250, 1),
'max_depth' : hp.quniform('max_depth', 2, 20, 1),
'min_samples_split' : hp.quniform('min_samples_split', 2, 10, 1),
'max_features' : hp.quniform('max_features', 1, train_features.shape[1], 1),
'learning_rate': hp.quniform('learning_rate', 0.01, 1, 0.05),
'min_samples_leaf': hp.quniform('min_samples_leaf', 1, 10, 1),
'subsample': hp.quniform('subsample', 0.6, 1, 0.1)}
# This will contain contain logging information
trials = Trials()
best=fmin(fn=gb_mse_cv, # function to optimize
space=space,
algo=tpe.suggest, # optimization algorithm, hyperotp will select its parameters automatically
max_evals=n_iter, # maximum number of iterations
trials=trials, # logging
rstate=np.random.RandomState(random_state) # fixing random state for the reproducibility
)
print("Best MAE {:.3f} params {}".format( gb_mse_cv(best), best))
# computing the score on the test set
model = GradientBoostingRegressor(random_state=random_state, n_estimators=int(best['n_estimators']), max_depth=int(best['max_depth']),
min_samples_split=int(best['min_samples_split']), max_features=int(best['max_features']))
# Opening output .csv log file
of_connection=open(out_file, 'a')
writer=csv.writer(of_connection)
# Print model parameters
print(' ')
print(model.get_params())
t0=time.time()
# Train the model on training data
model.fit(train_features, train_labels);
# Train time calculation
train_time=time.time()-t0
t0=time.time()
# Test the model on test data
predictions = model.predict(test_features)
# Test time calculation
test_time=time.time()-t0
# Print train and test time
print('Train time=:', round(train_time, 2), ' s')
print('Test time=:', round(test_time, 2), ' s')
# Model metrics calculations
MAE=mean_absolute_error(test_labels, predictions)
MSE=mean_squared_error(test_labels, predictions)
RMSE=math.sqrt(MSE)
R2=r2_score(test_labels, predictions)
errors = abs(predictions - test_labels)
mape = 100 * (errors / test_labels)
accuracy = 100 - np.mean(mape)
# K-Fold cross-validation
cv_score=-cross_val_score(model, train_features, train_labels, cv=kf, scoring="neg_mean_absolute_error", n_jobs=-1).mean()
# Model metrics print
print('MAE=:', round(MAE, 6), 'W.')
print('MSE=:', round(MSE, 6), 'W.')
print('RMSE=:', round(RMSE, 6), 'W.')
print('R2=:', round(R2, 6), '.')
print('Accuracy (100-MAPE))=', round(accuracy, 6), '%.')
print('Cross-validation MAE score=', round(cv_score, 6), ' W.')
# Model parameters importances definitions
Irradiance_importance = model.feature_importances_[0]
Temperature_importance = model.feature_importances_[1]
Clearness_index_importance = model.feature_importances_[2]
Hour_of_day_importance = model.feature_importances_[3]
Previous_power_importance = model.feature_importances_[4]
# Model parameters importances print
print(model.feature_importances_)
# Write results to .csv log output file
writer.writerow([datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), model_code, method, varying_parameter,
model.get_params()['loss'], model.get_params()['learning_rate'],
model.get_params()['n_estimators'], model.get_params()['subsample'],
model.get_params()['criterion'], model.get_params()['min_samples_split'],
model.get_params()['min_samples_leaf'], model.get_params()['min_weight_fraction_leaf'],
model.get_params()['max_depth'], model.get_params()['min_impurity_decrease'],
model.get_params()['min_impurity_split'], model.get_params()['init'],
model.get_params()['random_state'], model.get_params()['max_features'],
model.get_params()['alpha'], model.get_params()['verbose'],
model.get_params()['max_leaf_nodes'], model.get_params()['warm_start'],
model.get_params()['presort'], model.get_params()['validation_fraction'],
model.get_params()['n_iter_no_change'], model.get_params()['tol'],
MAE, MSE, RMSE, R2, accuracy, cv_score,
Irradiance_importance, Temperature_importance,
Clearness_index_importance, Hour_of_day_importance, Previous_power_importance,
train_time, test_time])
print('Finish case ', varying_parameter, '= ', varying_parameter_item)
print('-----------------------------------------------------')
print(' ')