0

我建立了很多 GLM。通常在具有许多模型参数的大型数据集上。这意味着 base R 的glm()函数并不是真正有用,因为它无法处理大小/复杂性,所以我通常使用它revoScaleR::rxGlm()

但是,我希望能够对嵌套模型对进行 ANOVA 测试,但我还没有找到一种方法来对rxGlm()创建的模型对象执行此操作,因为 R 的anova()函数不适用于它们。revoScaleR提供了一个将对象as.glm()转换为rxGlm()对象的glm()函数 - 有点 - 但它在这里不起作用。

例如:

library(dplyr)

data(mtcars)

# don't like having named rows

mtcars <- mtcars %>% 
  mutate(veh_name = rownames(.)) %>%
  select(veh_name, everything())

# fit a GLM: mpg ~ everything else

glm_a1 <- glm(mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb,
             data = mtcars,
             family = gaussian(link = "identity"),
             trace = TRUE)

summary(glm_a1)

# fit another GLM where gear is removed

glm_a2 <- glm(mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + carb,
             data = mtcars,
             family = gaussian(link = "identity"),
             trace = TRUE)

summary(glm_a2)

# F test on difference

anova(glm_a1, glm_a2, test = "F")

工作正常,但如果我这样做:

library(dplyr)

data(mtcars)

# don't like having named rows

mtcars <- mtcars %>% 
  mutate(veh_name = rownames(.)) %>%
  select(veh_name, everything())

glm_b1 <- rxGlm(mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb,
               data = mtcars,
               family = gaussian(link = "identity"),
               verbose = 1)

summary(glm_b1)

# fit another GLM where gear is removed

glm_b2 <- rxGlm(mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + carb,
               data = mtcars,
               family = gaussian(link = "identity"),
               verbose = 1)

summary(glm_b2)

# F test on difference

anova(as.glm(glm_b1), as.glm(glm_b2), test = "F")

我看到错误消息:

Error in qr.lm(object) : lm object does not have a proper 'qr'
component. Rank zero or should not have used lm(.., qr=FALSE)

同样的问题出现在之前的 SO 帖子中:Error convert rxGlm to GLM but not似乎已经解决了。

有人可以帮忙吗?如果as.glm()在这里没有帮助,还有其他方法吗?我可以编写一个自定义函数来执行此操作(将我的编码能力扩展到我怀疑的极限!)?

此外,SO 是最好的论坛,还是其他 StackExchange 论坛之一是寻求指导的更好地方?

谢谢你。

4

1 回答 1

0

部分解决...

my_anova <- function (model_1, model_2, test_type)
{
  
  # only applies for nested GLMs. How do I test for this?
  
  cat("\n")
  
  if(test_type != "F")
  {
    cat("Invalid function call")
  }
  else
  {
    # display model formulae
    
    cat("Model 1:", format(glm_b1$formula), "\n")
    cat("Model 2:", format(glm_b2$formula), "\n")
    
    if(test_type == "F") 
    {
      
      if (model_1$df[2] < model_2$df[2]) # model 1 is big, model 2 is small
        
      {
        dev_s <- model_2$deviance
        df_s  <- model_2$df[2]
        dev_b <- model_1$deviance
        df_b  <- model_1$df[2]
      }
      else # model 2 is big, model 1 is small
      {
        dev_s <- model_1$deviance
        df_s  <- model_1$df[2]
        dev_b <- model_2$deviance
        df_b  <- model_2$df[2]
      }
      
      F <- (dev_s - dev_b) / ((df_s - df_b) * dev_b / df_b)
      
    }
    
    # still need to calculate the F tail probability however
    
    # df of F: numerator: df_s - df_b
    # df of F: denominator: df_b
    
    F_test <- pf(F, df_s - df_b, df_b, lower.tail = FALSE)
    
    cat("\n")
    cat("F:     ", round(F, 4), "\n")
    cat("Pr(>F):", round(F_test, 4))
  }
}
于 2020-08-22T12:13:20.620 回答