使用 python 库,我正在训练 GLM 作为我正在创建的 H2O 集成的一部分:
(脚本中的相关片段):
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
estimator = H2OGeneralizedLinearEstimator(
nfolds=5, keep_cross_validation_predictions=True,
fold_assignment='Modulo',
solver='COORDINATE_DESCENT',
alpha=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
lambda_=[
319.3503133509223, 198.32195498930167,
123.16129399741205, 76.48525015768037,
47.49863615273374, 29.49745776759067,
18.318421016404645, 11.376049799889723,
7.064719657533626, 4.387310597042732,
2.7245942101039184, 1.6920191642541902,
1.050772567007053, 0.652547566186266,
0.4052430939917762, 0.2516628269534475,
0.1562868791824152, 0.09705679976763945,
0.060273916981481074, 0.03743112359966524,
0.023245361909429163, 0.01443576356615629,
0.008964853743724, 0.005567326056432596,
0.003457403802078978, 0.0021471063360513445,
0.0013333894107306014, 0.0008280574142025093,
0.0005142376830786764, 0.0003193503133509216],
lambda_search=True, nlambdas=30, max_iterations=300,
objective_epsilon=0.0001, gradient_epsilon=1.00E-06,
link='identity', lambda_min_ratio=1.00E-06,
max_active_predictors=5000, obj_reg=1.03E-05,
max_runtime_secs=342.6666667)
estimator.train(x=predictors, y=response, training_frame=df_h20)
我与包含不同特征组合的其他数据帧并行运行此训练
with futures.ThreadPoolExecutor(
max_workers=len(persona_list)) as executor:
future_list = {
executor.submit(
AVM_H2O.regressor,
area,
[x[1]],
dataset,
h20_mms_GB,
timestamp,
datestring,
S3_upload_bucket,
logfile,
54320 + x[0]): x for x in enumerate(persona_list, 1)}
for future in futures.as_completed(future_list):
future.result()
我在许多不同的数据集上多次执行此操作,但我似乎只是随机遇到此错误。当我尝试重新创建错误时,我似乎无法这样做。
完整的错误信息是:
H2OResponseError: ModelBuilderErrorV3 (water.exceptions.H2OModelBuilderIllegalArgumentException):
timestamp = 1583433807040
error_url = '/3/ModelBuilders/glm'
msg = 'Illegal argument(s) for GLM model: GLM_model_python_1583433786455_5. Details: ERRR on field: _train: Missing training frame: py_7_sid_b8c3'
dev_msg = 'Illegal argument(s) for GLM model: GLM_model_python_1583433786455_5. Details: ERRR on field: _train: Missing training frame: py_7_sid_b8c3'
http_status = 412
values = {'messages': [{'_log_level': 1, '_field_name': '_train', '_message': 'Missing training frame: py_7_sid_b8c3'}, {'_log_level': 5, '_field_name': '_balance_classes', '_message': 'Not applicable since class balancing is not required for GLM.'}, {'_log_level': 5, '_field_name': '_max_after_balance_size', '_message': 'Not applicable since class balancing is not required for GLM.'}, {'_log_level': 5, '_field_name': '_class_sampling_factors', '_message': 'Not applicable since class balancing is not required for GLM.'}, {'_log_level': 5, '_field_name': '_tweedie_variance_power', '_message': 'Only applicable with Tweedie family'}, {'_log_level': 5, '_field_name': '_tweedie_link_power', '_message': 'Only applicable with Tweedie family'}, {'_log_level': 5, '_field_name': '_theta', '_message': 'Only applicable with Negative Binomial family'}], 'algo': 'GLM', 'parameters': {'_train': {'name': 'py_7_sid_b8c3', 'type': 'Key'}, '_valid': None, '_nfolds': 5, '_keep_cross_validation_models': True, '_keep_cross_validation_predictions': True, '_keep_cross_validation_fold_assignment': False, '_parallelize_cross_validation': True, '_auto_rebalance': True, '_seed': -1, '_fold_assignment': 'Modulo', '_categorical_encoding': 'AUTO', '_max_categorical_levels': 10, '_distribution': 'AUTO', '_tweedie_power': 1.5, '_quantile_alpha': 0.5, '_huber_alpha': 0.9, '_ignored_columns': None, '_ignore_const_cols': True, '_weights_column': None, '_offset_column': None, '_fold_column': None, '_check_constant_response': True, '_is_cv_model': False, '_score_each_iteration': False, '_max_runtime_secs': 342.6666667, '_stopping_rounds': 3, '_stopping_metric': 'deviance', '_stopping_tolerance': 0.0001, '_response_column': 'price_lr', '_balance_classes': False, '_max_after_balance_size': 5.0, '_class_sampling_factors': None, '_max_confusion_matrix_size': 20, '_checkpoint': None, '_pretrained_autoencoder': None, '_custom_metric_func': None, '_custom_distribution_func': None, '_export_checkpoints_dir': None, '_standardize': True, '_useDispersion1': False, '_family': 'gaussian', '_rand_family': None, '_link': 'identity', '_rand_link': None, '_solver': 'COORDINATE_DESCENT', '_tweedie_variance_power': 0.0, '_tweedie_link_power': 1.0, '_theta': 1e-10, '_invTheta': 10000000000.0, '_alpha': [0.0, 0.2, 0.4, 0.6, 0.8, 1.0], '_lambda': [319.3503133509223, 198.32195498930167, 123.16129399741205, 76.48525015768037, 47.49863615273374, 29.49745776759067, 18.318421016404645, 11.376049799889723, 7.064719657533626, 4.387310597042732, 2.7245942101039184, 1.6920191642541902, 1.050772567007053, 0.652547566186266, 0.4052430939917762, 0.2516628269534475, 0.1562868791824152, 0.09705679976763945, 0.060273916981481074, 0.03743112359966524, 0.023245361909429163, 0.01443576356615629, 0.008964853743724, 0.005567326056432596, 0.003457403802078978, 0.0021471063360513445, 0.0013333894107306014, 0.0008280574142025093, 0.0005142376830786764, 0.0003193503133509216], '_startval': None, '_calc_like': False, '_random_columns': None, '_missing_values_handling': None, '_prior': -1.0, '_lambda_search': True, '_HGLM': False, '_nlambdas': 30, '_non_negative': False, '_exactLambdas': False, '_lambda_min_ratio': 1e-06, '_use_all_factor_levels': False, '_max_iterations': 300, '_intercept': True, '_beta_epsilon': 0.0001, '_objective_epsilon': 0.0001, '_gradient_epsilon': 1e-06, '_obj_reg': 1.03e-05, '_compute_p_values': False, '_remove_collinear_columns': False, '_interactions': None, '_interaction_pairs': None, '_early_stopping': True, '_beta_constraints': None, '_plug_values': None, '_max_active_predictors': 5000, '_stdOverride': False}, 'error_count': 2}
exception_msg = 'Illegal argument(s) for GLM model: GLM_model_python_1583433786455_5. Details: ERRR on field: _train: Missing training frame: py_7_sid_b8c3'
stacktrace =
water.exceptions.H2OModelBuilderIllegalArgumentException: Illegal argument(s) for GLM model: GLM_model_python_1583433786455_5. Details: ERRR on field: _train: Missing training frame: py_7_sid_b8c3
water.exceptions.H2OModelBuilderIllegalArgumentException.makeFromBuilder(H2OModelBuilderIllegalArgumentException.java:19)
hex.ModelBuilder.trainModelOnH2ONode(ModelBuilder.java:304)
water.api.ModelBuilderHandler.handle(ModelBuilderHandler.java:64)
water.api.ModelBuilderHandler.handle(ModelBuilderHandler.java:17)
water.api.RequestServer.serve(RequestServer.java:471)
water.api.RequestServer.doGeneric(RequestServer.java:301)
water.api.RequestServer.doPost(RequestServer.java:227)
javax.servlet.http.HttpServlet.service(HttpServlet.java:755)
javax.servlet.http.HttpServlet.service(HttpServlet.java:848)
org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:684)
org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:501)
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1086)
org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:427)
org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1020)
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
water.webserver.jetty8.Jetty8ServerAdapter$LoginHandler.handle(Jetty8ServerAdapter.java:119)
org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
org.eclipse.jetty.server.Server.handle(Server.java:370)
org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:494)
org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:984)
org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1045)
org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:861)
org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:236)
org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
java.base/java.lang.Thread.run(Thread.java:834)
parameters = {'__meta': {'schema_version': 3, 'schema_name': 'GLMParametersV3', 'schema_type': 'GLMParameters'}, 'model_id': None, 'training_frame': None, 'validation_frame': None, 'nfolds': 5, 'keep_cross_validation_models': True, 'keep_cross_validation_predictions': True, 'keep_cross_validation_fold_assignment': False, 'parallelize_cross_validation': True, 'distribution': 'AUTO', 'tweedie_power': 1.5, 'quantile_alpha': 0.5, 'huber_alpha': 0.9, 'response_column': {'__meta': {'schema_version': 3, 'schema_name': 'ColSpecifierV3', 'schema_type': 'VecSpecifier'}, 'column_name': 'price_lr', 'is_member_of_frames': None}, 'weights_column': None, 'offset_column': None, 'fold_column': None, 'fold_assignment': 'Modulo', 'categorical_encoding': 'AUTO', 'max_categorical_levels': 10, 'ignored_columns': None, 'ignore_const_cols': True, 'score_each_iteration': False, 'checkpoint': None, 'stopping_rounds': 3, 'max_runtime_secs': 342.6666667, 'stopping_metric': 'deviance', 'stopping_tolerance': 0.0001, 'custom_metric_func': None, 'custom_distribution_func': None, 'export_checkpoints_dir': None, 'seed': -1, 'family': 'gaussian', 'rand_family': None, 'tweedie_variance_power': 0.0, 'tweedie_link_power': 1.0, 'theta': 1e-10, 'solver': 'COORDINATE_DESCENT', 'alpha': [0.0, 0.2, 0.4, 0.6, 0.8, 1.0], 'lambda': [319.3503133509223, 198.32195498930167, 123.16129399741205, 76.48525015768037, 47.49863615273374, 29.49745776759067, 18.318421016404645, 11.376049799889723, 7.064719657533626, 4.387310597042732, 2.7245942101039184, 1.6920191642541902, 1.050772567007053, 0.652547566186266, 0.4052430939917762, 0.2516628269534475, 0.1562868791824152, 0.09705679976763945, 0.060273916981481074, 0.03743112359966524, 0.023245361909429163, 0.01443576356615629, 0.008964853743724, 0.005567326056432596, 0.003457403802078978, 0.0021471063360513445, 0.0013333894107306014, 0.0008280574142025093, 0.0005142376830786764, 0.0003193503133509216], 'lambda_search': True, 'early_stopping': True, 'nlambdas': 30, 'standardize': True, 'missing_values_handling': 'MeanImputation', 'plug_values': None, 'non_negative': False, 'max_iterations': 300, 'beta_epsilon': 0.0001, 'objective_epsilon': 0.0001, 'gradient_epsilon': 1e-06, 'obj_reg': 1.03e-05, 'link': 'identity', 'rand_link': None, 'startval': None, 'random_columns': None, 'calc_like': False, 'intercept': True, 'HGLM': False, 'prior': -1.0, 'lambda_min_ratio': 1e-06, 'beta_constraints': None, 'max_active_predictors': 5000, 'interactions': None, 'interaction_pairs': None, 'balance_classes': False, 'class_sampling_factors': None, 'max_after_balance_size': 5.0, 'max_confusion_matrix_size': 20, 'max_hit_ratio_k': 0, 'compute_p_values': False, 'remove_collinear_columns': False}
messages = [{'__meta': {'schema_version': 3, 'schema_name': 'ValidationMessageV3', 'schema_type': 'ValidationMessage'}, 'message_type': 'ERRR', 'field_name': 'train', 'message': 'Missing training frame: py_7_sid_b8c3'}, {'__meta': {'schema_version': 3, 'schema_name': 'ValidationMessageV3', 'schema_type': 'ValidationMessage'}, 'message_type': 'TRACE', 'field_name': 'balance_classes', 'message': 'Not applicable since class balancing is not required for GLM.'}, {'__meta': {'schema_version': 3, 'schema_name': 'ValidationMessageV3', 'schema_type': 'ValidationMessage'}, 'message_type': 'TRACE', 'field_name': 'max_after_balance_size', 'message': 'Not applicable since class balancing is not required for GLM.'}, {'__meta': {'schema_version': 3, 'schema_name': 'ValidationMessageV3', 'schema_type': 'ValidationMessage'}, 'message_type': 'TRACE', 'field_name': 'class_sampling_factors', 'message': 'Not applicable since class balancing is not required for GLM.'}, {'__meta': {'schema_version': 3, 'schema_name': 'ValidationMessageV3', 'schema_type': 'ValidationMessage'}, 'message_type': 'TRACE', 'field_name': 'tweedie_variance_power', 'message': 'Only applicable with Tweedie family'}, {'__meta': {'schema_version': 3, 'schema_name': 'ValidationMessageV3', 'schema_type': 'ValidationMessage'}, 'message_type': 'TRACE', 'field_name': 'tweedie_link_power', 'message': 'Only applicable with Tweedie family'}, {'__meta': {'schema_version': 3, 'schema_name': 'ValidationMessageV3', 'schema_type': 'ValidationMessage'}, 'message_type': 'TRACE', 'field_name': 'theta', 'message': 'Only applicable with Negative Binomial family'}]
error_count = 2
因为我无法重现该错误,所以我正在努力了解导致它的原因。任何帮助将不胜感激。