0

我想知道如何手动计算当我在 lm 对象上调用 R 中的 vcov() 函数时返回的内容,即方差-协方差矩阵。

对角线条目非常简单,只需取 st。每个参数的误差并将其平方。例如,对于下面的估计,方差-协方差矩阵的条目 (z,z) 只需通过取 z 的估计系数的标准误差 (0.1096658) 并将其平方来计算。但是非对角线条目呢?有人可以提供代码来手动计算这些吗?

# Minimal working example code
library(data.table)
df <- data.table(y = runif(100, 0, 100),
                 x = runif(100, 0, 100),
                 z = runif(100, 0, 100))
coef(lm(y ~ x + z, df)) # coefficients
coef(summary(lm(y ~ x + z, df)))[,2] # standard errors
vcov(lm(y ~ x + z, df)) # variance-covariance matrix

# Example result
coef(lm(y ~ x + z, df))
(Intercept)           x           z 
54.44460066  0.03052479 -0.11197309 

coef(summary(lm(y ~ x + z, df)))[,2]
(Intercept)           x           z 
  8.4668880   0.1077858   0.1096658 

vcov(lm(y ~ x + z, df))
            (Intercept)            x            z
(Intercept)  71.6881931 -0.576781121 -0.585380616
x            -0.5767811  0.011617776 -0.001059506
z            -0.5853806 -0.001059506  0.012026594
4

2 回答 2

0

对于“手动计算”,您可以检查这个


library(data.table)
df     = data.table(y = runif(100, 0, 100),
                    x = runif(100, 0, 100),
                    z = runif(100, 0, 100))
mod    = lm(y ~ ., df)
X      = cbind(1, as.matrix(df[, -1]))
invXtX = solve(crossprod(X))
coef   = invXtX %*% t(X) %*% df$y
resid  = df$y - X %*% coef
df.res = nrow(X) - ncol(X)
manual = invXtX * sum(resid**2)/(df.res)
funct  = vcov(mod)
all.equal(manual, funct, check.attributes = F)# should be TRUE
于 2021-10-01T13:42:57.403 回答
0

查看函数的细节,它似乎是未缩放的协方差乘以 sigma 平方(如果完整情况为假):

suppressMessages(library(data.table))
df <- data.table(y = runif(100, 0, 100),
                 x = runif(100, 0, 100),
                 z = runif(100, 0, 100))
stats:::vcov.summary.lm
#> function (object, complete = TRUE, ...) 
#> .vcov.aliased(object$aliased, object$sigma^2 * object$cov.unscaled, 
#>     complete = complete)
#> <bytecode: 0x5619b28eec88>
#> <environment: namespace:stats>
stats:::.vcov.aliased
#> function (aliased, vc, complete = TRUE) 
#> {
#>     if (complete && NROW(vc) < (P <- length(aliased)) && any(aliased)) {
#>         cn <- names(aliased)
#>         VC <- matrix(NA_real_, P, P, dimnames = list(cn, cn))
#>         j <- which(!aliased)
#>         VC[j, j] <- vc
#>         VC
#>     }
#>     else vc
#> }
#> <bytecode: 0x5619b3644430>
#> <environment: namespace:stats>
vcov(mod<-lm(y ~ x + z, df))
#>             (Intercept)             x             z
#> (Intercept)  54.1734049 -0.4767886045 -0.4498457055
#> x            -0.4767886  0.0089655548  0.0005973927
#> z            -0.4498457  0.0005973927  0.0082190981
summary(mod)$cov.unscaled* summary(mod)$sigma^2
#>             (Intercept)             x             z
#> (Intercept)  54.1734049 -0.4767886045 -0.4498457055
#> x            -0.4767886  0.0089655548  0.0005973927
#> z            -0.4498457  0.0005973927  0.0082190981

reprex 包于 2021-10-01 创建(v2.0.1)

于 2021-10-01T13:37:28.053 回答