1

我被要求通过使用 zoo 包中的 na.locf() 函数来使用 LOCF 和 NOCB 方法估算数据集,我现在正在尝试绘制观察值和估算值。我正在使用的数据集如下:

structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27), 
    sex = c("F", "F", NA, "F", "F", "F", "F", "F", "F", "F", 
    "F", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", 
    "M", "M", "M", "M", "M"), d8 = c(21, 21, NA, 23.5, 21.5, 
    20, 21.5, 23, NA, 16.5, 24.5, 26, 21.5, 23, 25.5, 20, 24.5, 
    22, 24, 23, 27.5, 23, 21.5, 17, 22.5, 23, 22), d10 = c(20, 
    21.5, 24, 24.5, 23, 21, 22.5, 23, 21, 19, 25, 25, 22.5, 22.5, 
    27.5, 23.5, 25.5, 22, 21.5, 20.5, 28, 23, 23.5, 24.5, 25.5, 
    24.5, 21.5), d12 = c(21.5, 24, NA, 25, 22.5, 21, 23, 23.5, 
    NA, 19, 28, 29, 23, NA, 26.5, 22.5, 27, 24.5, 24.5, 31, 31, 
    23.5, 24, 26, 25.5, 26, 23.5), d14 = c(23, 25.5, 26, 26.5, 
    23.5, 22.5, 25, 24, 21.5, 19.5, 28, 31, 26.5, 27.5, 27, 26, 
    28.5, 26.5, 25.5, 26, 31.5, 25, 28, 29.5, 26, 30, 25)), class = c("spec_tbl_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -27L), spec = structure(list(
    cols = list(id = structure(list(), class = c("collector_double", 
    "collector")), sex = structure(list(), class = c("collector_character", 
    "collector")), d8 = structure(list(), class = c("collector_double", 
    "collector")), d10 = structure(list(), class = c("collector_double", 
    "collector")), d12 = structure(list(), class = c("collector_double", 
    "collector")), d14 = structure(list(), class = c("collector_double", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
    "collector")), skip = 1L), class = "col_spec"))

我通过将原始宽格式转换为长格式来估算缺失值,并遵循其余步骤:

data_long <-  tidyr::gather(dati, age, measurements, d8:d14, factor_key = TRUE)

data_locf <- data_long

locf <- na.locf(data_locf$measurements, na.rm = T, fromLast = F)
nocb <- na.locf(data_locf$measurements, na.rm = T, fromLast = T)

data_locf$measurements = ifelse(data_locf$age == 'd12', locf, nocb)

data_locf$sex = na.locf(data_locf$sex, na.rm = T, fromLast = T)

data_complete = complete(data = data_locf, fill = c(data_locf$measurements, data_locf$sex))

有没有人知道如何将估算值与观察值一起以图形方式绘制?我在这里向您介绍了一些我被推荐使用的功能,并且我已经开始对其进行适当的修改,但是没有成功。

#1 plot    
par(mfrow=c(1,1))
    measurements <- data_complete$measurements
    locf <- function(x) {
      a <- x[1]
      for (i in 2:length(x)) {
        if (is.na(x[i])) x[i] <- a
        else a <- x[i]
      }
      return(x)
    }
    meas1 <- na.locf(measurements)
    colvec <- ifelse(is.na(measurements),mdc(2),mdc(1))
    plot(measurements,col=colvec,type="l",xlab= 'sex' ,ylab="measurements")
    points(measurements, col=colvec,pch=20,cex=1)

不会返回正确区分两种性别的表示,并且:

 #2 plot 
par(mfrow=c(1,2))
breaks <- seq(-20, 200, 10)
nudge <- 1
lwd <- 1.5
x <- matrix(c(breaks-nudge, breaks+nudge), ncol=2)
obs <- airquality[,"Ozone"]
mis  <- imp$imp$Ozone[,1]
fobs <- c(hist(obs, breaks, plot=FALSE)$counts, 0)
fmis <- c(hist(mis, breaks, plot=FALSE)$counts, 0)
y <- matrix(c(fobs, fmis), ncol=2)

tp <- xyplot(imp, Ozone~Solar.R, na.groups=ici(imp),
             ylab="Ozone (ppb)", xlab="Solar Radiation (lang)",
             cex = 0.75, lex=lwd, pch=19,
             ylim = c(-20, 180), xlim = c(0,350))
print(tp)

它为 mouse 包中的空气质量数据集再现了一个很好的散点图。关键是我无法使用 na.locf 函数提取估算值。

我指定我应该将年龄/测量值绘制为响应变量与性别,这就是为什么我需要区分两种性别。

4

1 回答 1

1

我可能有点晚了,但是您可以使用imputeTS CRAN 包的绘图功能来应用不同的插补算法,并将这些算法与观察值一起绘制。

简短的例子:

library("imputeTS")

# Using tsAirgap as example time series

# Last Observation Carried Forward - LOCF
imp_locf <- na_locf(tsAirgap)

# Next Observation Carried Backwards - NOCB
imp_nocb <- na_locf(tsAirgap, option = "nocb")

# Impute with Moving average
imp_ma <- na_ma(tsAirgap)

# Example plot for the na_ma imputations
ggplot_na_imputations(tsAigap, imp_ma)

以下是这些图的样子: 在此处输入图像描述

还有其他可用的缺失数据图和插补方法,如线性插值、样条插值、斯蒂曼插值、季节性调整插补、状态空间模型上的卡尔曼平滑。

于 2021-10-28T13:43:30.607 回答