您可以使用 MCMC 生成的模型参数的后验分布来获得预测分布,使用逻辑函数。
例如,如果您的模型公式是y ~ x1 + x2 + x3
,并且您的 MCMC 输出存储在变量posterior.mcmc
中,那么您可以使用
function(x1, x2, x3) 1 / (1 + exp(-posterior.mcmc %*% rbind(1, x1, x2, x3)))
给出类似于的分布predict.glm(., 'response')
单个输入变量情况的更详细示例:
library(extraDistr)
library(MCMCpack)
# Take x uniformly distributed between -100 and 100
x <- runif(2000, min=-100, max=100)
# Generate a response which is logistic with some noise
beta <- 1/8
eps <- rnorm(length(x), 0, 1)
p <- function(x, eps) 1 / (1 + exp(-beta*x + eps))
p.x <- p(x, eps)
y <- sapply(p.x, function(p) rbern(1, p))
df1 <- data.frame(x, y)
# Fit by logistic regression
glm.logistic <- glm(y ~ x, df1, family=binomial)
# MCMC gives a distribution of values for the model parameters
posterior.mcmc <- MCMClogit(y ~ x, df1, verbose=2000)
densplot(posterior.mcmc)
# Thus, we have a distribution of model predictions for each x
predict.p.mcmc <- function(x) 1 / (1 + exp(-posterior.mcmc %*% rbind(1,x)))
interval.p.mcmc <- function(x, low, high) apply(predict.p.mcmc(x), 2,
function(x) quantile(x, c(low, high)))
predict.y.mcmc <- function(x) posterior.mcmc %*% rbind(1,x)
interval.y.mcmc <- function(x, low, high) apply(predict.y.mcmc(x), 2,
function(x) quantile(x, c(low, high)))
## Plot the data and fits ##
plot(x, p.x, ylab = 'Pr(y=1)', pch = 20, cex = 0.5, main = 'Probability vs x')
# x-values for prediction
x_test <- seq(-100, 100, 0.01)
# Blue line is the logistic function we used to generate the data, with noise removed
p_of_x_test <- p(x_test, 0)
lines(x_test, p_of_x_test, col = 'blue')
# Green line is the prediction from logistic regression
lines(x_test, predict(glm.logistic, data.frame(x = x_test), 'response'), col = 'green')
# Red lines indicates the range of model predictions from MCMC
# (for each x, 95% of the distribution of model predictions lies between these bounds)
interval.p.mcmc_95 <- interval.p.mcmc(x_test, 0.025, 0.975)
lines(x_test, interval.p.mcmc_95[1,], col = 'red')
lines(x_test, interval.p.mcmc_95[2,], col = 'red')
# Similarly for the log-odds
plot(x, log(p.x/(1 - p.x)), ylab = 'log[Pr(y=1) / (1 - Pr(y=1))]',
pch = 20, cex = 0.5, main = 'Log-Odds vs x')
lines(x_test, log(p_of_x_test/(1 - p_of_x_test)), col = 'blue')
lines(x_test, predict(glm.logistic, data.frame(x = x_test)), col = 'green')
interval.y.mcmc_95 <- interval.y.mcmc(x_test, 0.025, 0.975)
lines(x_test, interval.y.mcmc_95[1,], col = 'red')
lines(x_test, interval.y.mcmc_95[2,], col = 'red')