我想将以下示例数据汇总到一个新的数据框中,如下所示:
人口、样本量 (N)、完成百分比 (%)
样本大小是每个总体的所有记录的计数。我可以使用 table 命令或 tapply 来做到这一点。完成百分比是带有“结束日期”的记录的百分比(所有没有“结束日期”的记录都被假定为未完成。这就是我迷路的地方!
样本数据
sample <- structure(list(Population = structure(c(1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 3L), .Label = c("Glommen",
"Kaseberga", "Steninge"), class = "factor"), Start_Date = structure(c(16032,
16032, 16032, 16032, 16032, 16036, 16036, 16036, 16037, 16038,
16038, 16039, 16039, 16039, 16039, 16039, 16039, 16041, 16041,
16041, 16041, 16041, 16041, 16044, 16044, 16045, 16045, 16045,
16045, 16048, 16048, 16048, 16048, 16048, 16048), class = "Date"),
End_Date = structure(c(NA, 16037, NA, NA, 16036, 16043, 16040,
16041, 16042, 16042, 16042, 16043, 16043, 16043, 16043, 16043,
16043, 16045, 16045, 16045, 16045, 16045, NA, 16048, 16048,
16049, 16049, NA, NA, 16052, 16052, 16052, 16052, 16052,
16052), class = "Date")), .Names = c("Population", "Start_Date",
"End_Date"), row.names = c(NA, 35L), class = "data.frame")