请参考数据的输出。您可以直接向下滚动到目标和问题陈述。也许您不需要数据,因为您之前可能遇到过这个问题。
调用所需的库
library(zoo)
library(ggplot2)
library(scales)
library(plotly)
library(ggthemes)
library(forecast)
library(plotly)
library(DescTools)
数据输入
dput(ridership.ts)
structure(c(1709L, 1621L, 1973L, 1812L, 1975L, 1862L, 1940L,
2013L, 1596L, 1725L, 1676L, 1814L, 1615L, 1557L, 1891L, 1956L,
1885L, 1623L, 1903L, 1997L, 1704L, 1810L, 1862L, 1875L, 1705L,
1619L, 1837L, 1957L, 1917L, 1882L, 1933L, 1996L, 1673L, 1753L,
1720L, 1734L, 1563L, 1574L, 1903L, 1834L, 1831L, 1776L, 1868L,
1907L, 1686L, 1779L, 1776L, 1783L, 1548L, 1497L, 1798L, 1733L,
1772L, 1761L, 1792L, 1875L, 1571L, 1647L, 1673L, 1657L, 1382L,
1361L, 1559L, 1608L, 1697L, 1693L, 1836L, 1943L, 1551L, 1687L,
1576L, 1700L, 1397L, 1372L, 1708L, 1655L, 1763L, 1776L, 1934L,
2008L, 1616L, 1774L, 1732L, 1797L, 1570L, 1413L, 1755L, 1825L,
1843L, 1826L, 1968L, 1922L, 1670L, 1791L, 1817L, 1847L, 1599L,
1549L, 1832L, 1840L, 1846L, 1865L, 1966L, 1949L, 1607L, 1804L,
1850L, 1836L, 1542L, 1617L, 1920L, 1971L, 1992L, 2010L, 2054L,
2097L, 1824L, 1977L, 1981L, 2000L, 1683L, 1663L, 2008L, 2024L,
2047L, 2073L, 2127L, 2203L, 1708L, 1951L, 1974L, 1985L, 1760L,
1771L, 2020L, 2048L, 2069L, 1994L, 2075L, 2027L, 1734L, 1917L,
1858L, 1996L, 1778L, 1749L, 2066L, 2099L, 2105L, 2130L, 2223L,
2174L, 1931L, 2121L, 2076L, 2141L, 1832L, 1838L, 2132L), .Tsp = c(1991,
2004.16666666667, 12), class = "ts")
创建 ts 对象的数据框以使用 ggplot
tsd = data.frame(time = as.Date(ridership.ts),
value = as.matrix(ridership.ts))
建立线性模型
ridership.lm <- tslm(ridership.ts ~ trend + I(trend^2))
向现有数据框 tsd 添加新列
tsd$linear_fit = as.matrix(ridership.lm$fitted.values)
定义验证和培训期的长度
nValid = 36
nTrain = length(ridership.ts) - nValid
训练数据
train.ts = window(ridership.ts,
start = c(1991, 1),
end = c(1991, nTrain))
验证数据
valid.ts = window(ridership.ts,
start = c(1991, nTrain + 1),
end = c(1991, nTrain + nValid))
建筑模型
ridership.lm = tslm(train.ts ~ trend + I(trend^2))
使用我们的构建模型进行预测
ridership.lm.pred = forecast(ridership.lm, h = nValid, level = 0)
为拟合的模型值制作数据框
tsd_train_model = data.frame(time = as.Date(train.ts),
lm_fit_train = as.matrix(ridership.lm$fitted.values))
为绘图目的制作数据框
forecast_df = data.frame(time = as.Date(valid.ts),
value = as.matrix(ridership.lm.pred$mean))
使用 ggplot 创建绘图
p1 = ggplot(data = tsd,
aes(x = time, y = value)) +
geom_line(color = 'blue') +
ylim(1300, 2300) +
geom_line(data = tsd_train_model,
aes(x = time, y = lm_fit_train),
color = 'red')
p2 = p1 +
geom_line(data = forecast_df,
aes(x = time, y = value),
col = 'red', linetype = 'dotted') +
scale_x_date(breaks = date_breaks('1 years'),
labels = date_format('%b-%y')) +
geom_vline(xintercept = as.numeric(c(tsd_train_model[NROW(tsd_train_model), ]$time, #last date of training period
forecast_df[NROW(forecast_df), ]$time))) #last date of testing period
p3 = p2 +
annotate('text',
x = c(tsd_train_model[NROW(tsd_train_model)/2, ]$time,
forecast_df[NROW(forecast_df) / 2,]$time),
y = 2250,
label = c('Training Period', 'Validation Period'))
目的:我想在预测线的两侧(图中红色虚线)添加5个百分位和95个百分位的预测误差,并对区域进行阴影处理。
我使用分位数来生成预测范围
q = quantile(ridership.lm.pred$residuals, c(.05, .95))
percentile_5 = as.numeric(q[1])
percentile_95 = as.numeric(q[2])
为预测数据添加 5 个百分位和 95 个百分位
yl = forecast_df$value + percentile_5
ym = forecast_df$value + percentile_95
问题:如果我使用下面的命令,那么它不会在整个验证期内显示阴影区域。
p3 + geom_ribbon(data = forecast_df,
aes(ymin = yl,
ymax = ym),
fill="gray30")
NROW(yl)
[1]36
sum(is.na(yl))
[1] 0
NROW(ym)
[1] 36
sum(is.na(ym))
[1] 0
尝试过的事情:如果我用任何其他值替换 ymin 和 ymax 的值,例如如果我使用下面的命令,那么我会得到命令下方显示的图
p3 + geom_ribbon(data = forecast_df,
aes(ymin = rep(1750,36),
ymax = rep(2000,36),
fill="gray30"))
我的问题:
谁能告诉我图 2 中输出背后的原因?为什么 R 给出如图 2 所示的奇怪输出?
谁能帮我用ggplot遮蔽整个区域?