s_yhat 公式表明所请求的间隔是针对平均值的,而不是针对单个数据的。predict
在这种情况下,要在函数中使用的正确参数是interval="confidence"
。见下文:
library(gplots) # plotCI
data = data.frame(matrix(0, nrow=100, ncol=2))
colnames(data) = c("x", "y")
data$x = rnorm(100,0,1)
e = rnorm(100,0,4)
for (i in 1:100) {
data$y[i] = 2 + 3*data$x[i] + e[i]
}
plot(data$x, data$y, xlab="x", ylab="y", pch=20)
estimation_lm = lm(y~x, data)
(summary(estimation_lm))
(coef(estimation_lm))
abline(estimation_lm)
abline(a=2, b=3, col="red", lty="dotted")
predict = predict(estimation_lm, data, interval="confidence", level=0.95)
plotCI(data$x, predict[,1], li=predict[,2], ui=predict[,3], add=T, col="blue", gap=0, pch=NA_integer_)
legend("bottomright", legend=c("estimated regression", "true line", "confidence interval 95%"), lty=c("solid", "dashed", "solid"), col=c("black", "red", "blue"))