1

我正在运行以下 R 代码:

library(readxl)
library(prophet)
df <- read_excel("20151001-20180531 Data.xlsx", 
                                      col_types = c("date", "numeric", "numeric", 
                                                    "numeric", "numeric"))
h=365
prodf=data.frame(ds=df$Transaction_Date[-((nrow(df)-(h-1)):nrow(df))],
                 y=log(df$`Volume Demand`[-((nrow(df)-(h-1)):nrow(df))]),
                 gasessions=log(df$Sessions[-((nrow(df)-(h-1)):nrow(df))]),
                 avgprice=log(df$`Avg RRP`[-((nrow(df)-(h-1)):nrow(df))]),
                 discdepth=df$`Avg Discount`[-((nrow(df)-(h-1)):nrow(df))])

# allholidays=data.frame(holiday="allholidays",
#                     ds=as.Date(c("2015-10-31","2015-11-26","2015-11-27","2015-11-30","2015-12-18","2015-12-24","2015-12-25","2015-12-26","2015-12-31",
#                          "2016-01-01","2016-02-14","2016-03-25","2016-10-31","2016-11-24","2016-11-25","2016-11-28","2016-12-18","2016-12-24","2016-12-25","2016-12-26","2016-12-31",
#                          "2017-01-01","2017-02-14","2017-04-14","2017-10-31","2017-11-23","2017-11-24","2017-11-27","2017-12-16","2017-12-24","2017-12-25","2017-12-26","2017-12-31",
#                          "2018-01-01","2018-02-14","2018-04-1")),
#                     lower_window = 0,
#                     upper_window = 1, stringsAsFactors = F)
calendar <- read_excel("Calendar.xlsx", 
                 col_types = c("text", "date", "numeric", "numeric"))
m=prophet(holidays = calendar, holidays.prior.scale = 10,changepoint.prior.scale = 0.05)#,mcmc.samples = 300)
m <- add_seasonality(m, name='weekly', period=7, fourier.order=5, prior.scale=10)
m <- add_seasonality(m, name='yearly', period=365, fourier.order=5, prior.scale=10)
m <- add_regressor(m, 'gasessions')
m <- add_regressor(m, 'avgprice')
m <- add_regressor(m, 'discdepth')
m <- fit.prophet(m, prodf)
future <- make_future_dataframe(m, periods = h)
future$gasessions=log(df$Sessions)
future$avgprice=log(df$`Avg RRP`)
future$discdepth=df$`Avg Discount`
forecast <- predict(m, future)
MAPE=mean(abs(tail(exp(forecast$yhat),h)-tail(df$`Volume Demand`,h))/tail(df$`Volume Demand`,h))*100
accuracy=((sum(tail(exp(forecast$yhat),h))-sum(tail(df$`Volume Demand`,h)))/sum(tail(df$`Volume Demand`,h)))*100
TotForecast=sum(tail(exp(forecast$yhat),h))

我正在尝试预测明年的销售额。不幸的是,由于显而易见的原因,我无法提供数据。MAPE 为 19%,准确度为 27%。这些是我 365 天范围内的最佳结果。在 28 天的不同设置下,我已经实现了 0.5% 的准确度(99.5% 的正确预测),但 365 天的水平准确度比前面提到的更差(可能是过拟合)。有人对改进这个先知模型有什么建议吗?另外,如果有办法转换数据以便共享,请告诉我。亲切的问候乔治

4

1 回答 1

0

也许您是对的,模型可能过度拟合,但您可以通过更改fourier.order 和seasonality.priorscale 来控制它。Fourier.order :增加它允许模型拟合变化更快的季节性模式。Seasonality.priorscale :它是对模型季节性的正则化量的控制。正则化对于避免过度拟合很重要。所以我建议你减少fourier.order和seasonality.priorscale,如果它是过度拟合的情况。

于 2018-09-19T13:15:29.763 回答