我的数据位于此处:data_tbl.xlsx我无法在此处上传数据或不知道如何上传。
问题是我在尝试将训练数据拟合到工作流程时遇到错误。我不明白这个问题,也不明白为什么我会得到它。
这是我的榨汁数据(recipe_num_only 配方):juuded_recipe.xlsx
这是我的拆分对象:
splits <- initial_time_split(
data_final_tbl
, prop = 0.8
, cumulative = TRUE
)
这是我的食谱(有问题的是 recipe_num_only)
# Features ----------------------------------------------------------------
recipe_base <- recipe(value ~ ., data = training(splits))
recipe_date <- recipe_base %>%
step_timeseries_signature(date_col) %>%
step_rm(matches("(iso$)|(xts$)|(hour)|(min)|(sec)|(am.pm)")) %>%
step_normalize(contains("index.num"), contains("date_col_year"))
recipe_fourier <- recipe_date %>%
step_dummy(all_nominal_predictors(), one_hot = TRUE) %>%
step_fourier(date_col, period = 365/12, K = 1) %>%
step_YeoJohnson(value, limits = c(0,1))
recipe_fourier_final <- recipe_fourier %>%
step_nzv(all_predictors())
recipe_pca <- recipe_base %>%
step_timeseries_signature(date_col) %>%
step_rm(matches("(iso$)|(xts$)|(hour)|(min)|(sec)|(am.pm)")) %>%
step_dummy(all_nominal_predictors(), one_hot = TRUE) %>%
step_normalize(value) %>%
step_fourier(date_col, period = 365/52, K = 1) %>%
step_normalize(all_numeric_predictors()) %>%
step_nzv(all_predictors()) %>%
step_pca(
all_numeric_predictors(), -date_col_index.num
, threshold = .95
)
recipe_num_only <- recipe_pca %>%
step_rm(-all_numeric_predictors())
这是我的 XGBoost 模型规格
# XGBoost -----------------------------------------------------------------
model_spec_boost <- boost_tree(
mode = "regression"
# , mtry = 25
# , trees = 25
# , min_n = 10
# , tree_depth = 2
# , learn_rate = 0.3
# , loss_reduction = 0.01
) %>%
set_engine("xgboost")
# * * Testing ----
set.seed(123)
workflow() %>%
add_model(model_spec_boost) %>%
add_recipe(recipe_num_only) %>%
fit(training(splits))
# * * End Test ----
我得到的错误如下:
> workflow() %>%
+ add_model(model_spec_boost) %>%
+ add_recipe(recipe_num_only) %>%
+ fit(training(splits))
Error in setinfo.xgb.DMatrix(dmat, names(p), p[[1]]) :
The length of labels must equal to the number of rows in the input data
Timing stopped at: 0 0 0
一切正常,直到我到达fit(training(splits))
> workflow() %>%
+ add_model(model_spec_boost) %>%
+ add_recipe(recipe_num_only)
== Workflow ==========================================================================================
Preprocessor: Recipe
Model: boost_tree()
-- Preprocessor --------------------------------------------------------------------------------------
9 Recipe Steps
* step_timeseries_signature()
* step_rm()
* step_dummy()
* step_normalize()
* step_fourier()
* step_normalize()
* step_nzv()
* step_pca()
* step_rm()
-- Model ---------------------------------------------------------------------------------------------
Boosted Tree Model Specification (regression)
Computational engine: xgboost
在这里有点失落