2

在时间序列中,预测外部回归变量可以产生很大的不同。目前我想使用模型时间框架来跟踪外部回归器的影响。

但是,到目前为止,我找不到有关此主题的任何有用信息。我才发现,你可以在你的食谱中添加带有“+”的回归变量。

在我的食谱中添加变量 Transactions(每天的交易数和商店)和 Open_Closed(1 = 商店已关闭,0 = 商店已打开)后,我发现对预测没有影响。我怎样才能做到这一点?

一些代表数据:

suppressPackageStartupMessages(library(modeltime))
suppressPackageStartupMessages(library(tidymodels))
suppressPackageStartupMessages(library(lubridate))
suppressPackageStartupMessages(library(timetk))


#### DATA

data <- data.frame (Store = c(rep("1",365),rep("2",365)),
                    Sales = c(seq( 1, 44, length.out = 365)),
                    Date = c(dates <- ymd("2013-01-01")+ days(0:364)),
                    Transactions = c(seq( 50, 100, length.out = 365)),
                    Open_Closed = sample(rep(0:1,each=365))
)

h = 42

# split
set.seed(234)
splits <- time_series_split(data, assess = "42 days", cumulative = TRUE)

# recipe
recipe_spec <- recipe(Sales ~ Date + Transactions + Open_Closed, data) %>%
  step_timeseries_signature(Date) %>%
  step_rm(matches("(iso$)|(xts$)|(day)|(hour)|(min)|(sec)|(am.pm)")) %>% 
step_dummy(all_nominal())    
recipe_spec %>% prep() %>% juice()


#### MODELS

# elnet
model_spec_glmnet <- linear_reg(penalty = 1) %>%
  set_engine("glmnet")
wflw_fit_glmnet <- workflow() %>%
  add_model(model_spec_glmnet) %>%
  add_recipe(recipe_spec %>% step_rm(Date)) %>%
  fit(training(splits))

# xgboost
model_spec_xgboost <- boost_tree("regression", learn_rate = 0.35) %>%
  set_engine("xgboost")
set.seed(123)
wflw_fit_xgboost <- workflow() %>%
  add_model(model_spec_xgboost) %>%
  add_recipe(recipe_spec %>% step_rm(Date)) %>%
  fit(training(splits))

# sub tbl
submodels_tbl <- modeltime_table(
  wflw_fit_glmnet,
  wflw_fit_xgboost
)

submodels_tbl %>% 
  modeltime_accuracy(testing(splits)) %>%
  table_modeltime_accuracy(.interactive = FALSE)
4

0 回答 0