0

我的数据集是包含两个变量 Production 和 Shipment 的每周数据。产量是自变量,出货量是因变量。首先,我试图预测 Production 值并将其用作预测 Shipment 变量的回归量。

如果我使用训练集日期范围 From-> "2018-12-31" To-> "2021-11-22" 运行 Arima,模型将在 10 分钟内运行,我可以看到模型值。

使用相同的模型,如果我将训练集数据范围从-“2018-12-31”扩展到->“2021-12-27”,只要模型从未完成模型执行,模型就会运行,我无法查看模型输出。

你能帮我解决这个问题吗?谢谢你的支持

Original.df<-structure(list(YearWeek = c("201901", "201902", "201903", "201904", 
"201905", "201906", "201907", "201908", "201909", "201910", "201911", 
"201912", "201913", "201914", "201915", "201916", "201917", "201918", 
"201919", "201920", "201921", "201922", "201923", "201924", "201925", 
"201926", "201927", "201928", "201929", "201930", "201931", "201932", 
"201933", "201934", "201935", "201936", "201937", "201938", "201939", 
"201940", "201941", "201942", "201943", "201944", "201945", "201946", 
"201947", "201948", "201949", "201950", "201951", "201952", "202001", 
"202002", "202003", "202004", "202005", "202006", "202007", "202008", 
"202009", "202010", "202011", "202012", "202013", "202014", "202015", 
"202016", "202017", "202018", "202019", "202020", "202021", "202022", 
"202023", "202024", "202025", "202026", "202027", "202028", "202029", 
"202030", "202031", "202032", "202033", "202034", "202035", "202036", 
"202037", "202038", "202039", "202040", "202041", "202042", "202043", 
"202044", "202045", "202046", "202047", "202048", "202049", "202050", 
"202051", "202052", "202053", "202101", "202102", "202103", "202104", 
"202105", "202106", "202107", "202108", "202109", "202110", "202111", 
"202112", "202113", "202114", "202115", "202116", "202117", "202118", 
"202119", "202120", "202121", "202122", "202123", "202124", "202125", 
"202126", "202127", "202128", "202129", "202130", "202131", "202132", 
"202133", "202134", "202135", "202136", "202137", "202138", "202139", 
"202140", "202141", "202142", "202143", "202144", "202145", "202146", 
"202147", "202148", "202149", "202150", "202151", "202152", "202201", 
"202202", "202203"), Shipment = c(399, 1336, 1018, 1126, 1098, 
1235, 1130, 1258, 897, 1333, 1221, 1294, 1628, 1611, 1484, 1238, 
1645, 1936, 1664, 1482, 2060, 1964, 1875, 1645, 2039, 1640, 733, 
1764, 1639, 1968, 1692, 1677, 1542, 1299, 1328, 1130, 1741, 1929, 
1843, 1427, 1467, 1450, 1041, 1238, 1721, 1757, 1813, 1001, 1208, 
1916, 1435, 540, 681, 1436, 1170, 938, 1206, 1648, 1169, 1311, 
1772, 1333, 1534, 1365, 1124, 846, 732, 753, 1266, 1652, 1772, 
1814, 1649, 1191, 1298, 986, 1296, 1066, 777, 1041, 1388, 1289, 
1097, 1356, 1238, 1732, 1109, 1104, 1155, 1334, 1094, 770, 1411, 
1304, 1269, 1093, 1096, 1121, 943, 695, 1792, 2033, 1586, 768, 
685, 993, 1406, 1246, 1746, 1740, 938, 160, 1641, 1373, 1023, 
1173, 1611, 928, 1038, 1009, 1274, 1369, 1231, 1053, 1163, 880, 
870, 1131, 882, 1143, 632, 394, 510, 543, 535, 824, 874, 591, 
512, 448, 247, 452, 470, 747, 545, 639, 326, 414, 604, 640, 458, 
272, 524, 589, 666, 217, 215, 348, 537, 466), Production = c(794, 
1400, 1505, 1055, 1396, 1331, 1461, 1623, 1513, 1667, 1737, 1264, 
1722, 1587, 2094, 1363, 2007, 1899, 1749, 1693, 1748, 1455, 2078, 
1702, 1736, 1885, 860, 1372, 1716, 1290, 1347, 1451, 1347, 1409, 
1203, 1235, 1397, 1557, 1406, 1451, 1704, 670, 1442, 1336, 1611, 
1401, 1749, 744, 1558, 1665, 1317, 41, 441, 1351, 1392, 1180, 
1447, 1265, 1485, 1494, 1543, 1581, 1575, 1597, 1191, 1386, 889, 
1002, 1573, 1380, 1346, 1243, 1009, 965, 1051, 905, 1094, 1194, 
891, 1033, 921, 880, 1135, 1058, 1171, 1022, 956, 880, 902, 983, 
1014, 945, 1021, 1058, 1191, 1139, 1292, 573, 1173, 514, 1292, 
1310, 1239, 41, 41, 1182, 1028, 1028, 1196, 1214, 1045, 256, 1451, 
1344, 1352, 1257, 1444, 786, 1369, 1185, 1262, 1025, 949, 1051, 
941, 727, 911, 951, 987, 1136, 884, 770, 959, 1102, 1109, 1098, 
988, 983, 1002, 904, 1147, 1149, 919, 1058, 1112, 479, 1028, 
1154, 1126, 1155, 1208, 536, 839, 1178, 1225, 539, 41, 862, 839, 
873)), row.names = c(NA, 160L), class = "data.frame")

# Converting the df to accomodate leap year for weekly observations
Original.df <- Original.df %>%
  mutate(
    isoweek =stringr::str_replace(YearWeek, "^(\\d{4})(\\d{2})$", "\\1-W\\2-1"),
    date = ISOweek::ISOweek2date(isoweek)
  )

#creating test and train data- 1st case- Training data until WK47("2021-11-22")
Original.train.df <- Original.df %>%
  filter(date >= "2018-12-31", date <= "2021-11-22")

Original.test.df <- Original.df %>%
  filter(date >= "2021-11-29", date <= "2021-12-27")

Shipment.Test.df<- Original.test.df %>%
  dplyr::select(-YearWeek, -Production, -date,-isoweek) %>% as_tibble()

# splitting the original train data to contain only Week, Dependent and Independent variables
Total.train.df<-Original.train.df %>%
  mutate(Week.1 = yearweek(ISOweek::ISOweek(date))) %>%
  dplyr::select(-YearWeek,-date,-isoweek) %>%
  as_tsibble(index = Week.1)

#Model.1-Fitting forecast model(Arima with Fourier terms) to Production.qty with the training 
#until WK47(2021-11-22)

lambda_production<-Total.train.df %>% features(Production,features = guerrero) %>% pull(lambda_guerrero)

bestfit.Prod.1.AICc <- Inf

for(K in seq(25)){
  fit.Prod.1 <- Total.train.df %>% 
    model(ARIMA(box_cox(Production,lambda_production) ~ fourier(K = K), stepwise = FALSE, approximation = FALSE))
  
  if(purrr::pluck(glance(fit.Prod.1), "AICc") < bestfit.Prod.1.AICc)
  {
    bestfit.Prod.1.AICc <- purrr::pluck(glance(fit.Prod.1), "AICc")
    bestfit.Prod.1<- fit.Prod.1
    bestK.Prod.1 <- K
  }
}

bestK.Prod.1
glance(bestfit.Prod.1)

#creating test and train data- 2nd case- Training data until WK52("2021-12-27")
Original.train.df_2 <- Original.df %>%
  filter(date >= "2018-12-31", date <= "2021-12-27")

Original.test.df_2 <- Original.df %>%
  filter(date >= "2022-01-03", date <= "2022-01-17")

Shipment.Test.df_2<- Original.test.df_2 %>%
  dplyr::select(-YearWeek, -Production, -date,-isoweek) %>% as_tibble()

# splitting the original train data to contain only Week, Dependent and Independent variables
Total.train.df_2<-Original.train.df_2 %>%
  mutate(Week.1 = yearweek(ISOweek::ISOweek(date))) %>%
  dplyr::select(-YearWeek,-date,-isoweek) %>%
  as_tsibble(index = Week.1)


#Model.2-Fitting forecast model(Arima with Fourier terms) to Production.qty with the training 
#until WK52

lambda_production_2<-Total.train.df_2 %>% features(Production,features = guerrero) %>% pull(lambda_guerrero)

bestfit.Prod.2.AICc <- Inf

for(K in seq(25)){
  fit.Prod.2 <- Total.train.df %>% 
    model(ARIMA(box_cox(Production,lambda_production_2) ~ fourier(K = K), stepwise = FALSE, approximation = FALSE))
  
  if(purrr::pluck(glance(fit.Prod.1), "AICc") < bestfit.Prod.1.AICc)
  {
    bestfit.Prod.2.AICc <- purrr::pluck(glance(fit.Prod.2), "AICc")
    bestfit.Prod.2<- fit.Prod.2
    bestK.Prod.2 <- K
  }
}

bestK.Prod.2
glance(bestfit.Prod.2)

在上述模型 2 上,从未完全执行,模型仍在运行。

从上面可以看出,模型 1 和模型 2 除了训练数据之外没有任何区别,所以请您告诉我这里缺少什么。谢谢

4

0 回答 0