我正在扩展默认的 summary() 函数,因为我需要更多的百分位数。它似乎适用于一个变量,但如果我添加一个包含多个变量的数据框,我会得到奇怪的值,而使用默认的 summary() 它可以工作。即使我完全复制了默认的汇总函数,所以如果不添加更多的百分位数,它也不起作用。我使用这一行来获取代码:
getS3method('summary','default')
-
summary_adj <- function (object, ..., digits = max(3L, getOption("digits") -
3L))
{
if (is.factor(object))
return(summary.factor(object, ...))
else if (is.matrix(object))
return(summary.matrix(object, digits = digits, ...))
value <- if (is.logical(object))
c(Mode = "logical", {
tb <- table(object, exclude = NULL)
if (!is.null(n <- dimnames(tb)[[1L]]) && any(iN <- is.na(n))) dimnames(tb)[[1L]][iN] <- "NA's"
tb
})
else if (is.numeric(object)) {
nas <- is.na(object)
object <- object[!nas]
qq <- stats::quantile(object)
qq <- signif(c(qq[1L:3L], mean(object), qq[4L:5L]), digits)
names(qq) <- c("Min.", "1st Qu.", "Median", "Mean", "3rd Qu.",
"Max.")
if (any(nas))
c(qq, `NA's` = sum(nas))
else qq
}
else if (is.recursive(object) && !is.language(object) &&
(n <- length(object))) {
sumry <- array("", c(n, 3L), list(names(object), c("Length",
"Class", "Mode")))
ll <- numeric(n)
for (i in 1L:n) {
ii <- object[[i]]
ll[i] <- length(ii)
cls <- oldClass(ii)
sumry[i, 2L] <- if (length(cls))
cls[1L]
else "-none-"
sumry[i, 3L] <- mode(ii)
}
sumry[, 1L] <- format(as.integer(ll))
sumry
}
else c(Length = length(object), Class = class(object), Mode = mode(object))
class(value) <- c("summaryDefault", "table")
value
}
示例数据集:
nums <- data.frame(var1=rnorm(n=20,mean=5,sd=2),var2=rnorm(n=20,mean=10,sd=4))
-
> summary(nums)
var1 var2
Min. :1.821 Min. : 5.095
1st Qu.:3.705 1st Qu.: 7.827
Median :4.930 Median :10.440
Mean :4.975 Mean :10.176
3rd Qu.:6.553 3rd Qu.:12.247
Max. :7.802 Max. :14.862
> summary_adj(nums)
Length Class Mode
var1 20 -none- numeric
var2 20 -none- numeric
但它适用于 1 个变量:
> summary_adj(nums$var1)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.821 3.705 4.930 4.975 6.553 7.802
所以它似乎不适用于具有多个变量的数据框。非常感谢任何帮助!
最好的问候,蒂姆
@ 应要求编辑我添加了用于不同分位数的代码:
summary_adj<-function (object, ..., digits = max(3L, getOption("digits") -
3L))
{
if (is.factor(object))
return(summary.factor(object, ...))
else if (is.matrix(object))
return(summary.matrix(object, digits = digits, ...))
value <- if (is.logical(object))
c(Mode = "logical", {
tb <- table(object, exclude = NULL)
if (!is.null(n <- dimnames(tb)[[1L]]) && any(iN <- is.na(n))) dimnames(tb)[[1L]][iN] <- "NA's"
tb
})
else if (is.numeric(object)) {
nas <- is.na(object)
object <- object[!nas]
#qq <- stats::quantile(object)
qq <- stats::quantile(object,c(.05,.25,.5,.75,.95,1))
qq <- signif(c(qq[1L:3L], mean(object), qq[4L:6L],NROW(object)), digits)
names(qq) <- c("5th Perc", "25th Perc", "Median","Mean", "75th Perc","95th Perc",
"Max.","obs.")
if (any(nas))
c(qq, `NA's` = sum(nas))
else qq
}
else if (is.recursive(object) && !is.language(object) &&
(n <- length(object))) {
sumry <- array("", c(n, 3L), list(names(object), c("Length",
"Class", "Mode")))
ll <- numeric(n)
for (i in 1L:n) {
ii <- object[[i]]
ll[i] <- length(ii)
cls <- oldClass(ii)
sumry[i, 2L] <- if (length(cls))
cls[1L]
else "-none-"
sumry[i, 3L] <- mode(ii)
}
sumry[, 1L] <- format(as.integer(ll))
sumry
}
else c(Length = length(object), Class = class(object), Mode = mode(object))
class(value) <- c("summaryDefault", "table")
value
}
这适用于我的 df 中的一个变量:
summary_adj(nums$var1)
5th Perc 25th Perc Median Mean 75th Perc 95th Perc Max. obs.
1.984 3.705 4.930 4.975 6.553 7.491 7.802 20.000
但并非所有人:
> summary_adj(nums)
Length Class Mode
var1 20 -none- numeric
var2 20 -none- numeric
而它使用正常的摘要:
> summary(nums)
var1 var2
Min. :1.821 Min. : 5.095
1st Qu.:3.705 1st Qu.: 7.827
Median :4.930 Median :10.440
Mean :4.975 Mean :10.176
3rd Qu.:6.553 3rd Qu.:12.247
Max. :7.802 Max. :14.862