这是另一个基本 R 选项:
tab <- table(data.frame(as.vector(row(df[,-1L])), unlist(df[,-1L])))
colnames(tab)[max.col(tab, "first")]
或另一种data.table
方法:
melt(as.data.table(df), id.vars="id")[
order(id, value), ri := rowid(rleid(value))][,
value[which.max(ri)], id]$V1
计时码:
library(data.table)
set.seed(0L)
nr <- 1e5L
nc <- 4L
DF <- data.frame(id=1L:nr, as.data.frame(matrix(sample(letters, nr*nc, TRUE), ncol=nc)))
DT <- as.data.table(DF)
mtd0 <- function(df) apply(df,1,function(x) names(which.max(table(x))))
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
mtd_dt <- function(dt) melt(dt, id.vars="id")[, Mode(value), id]$V1
mtd_dt2 <- function(dt) melt(dt, id.vars="id")[
order(id, value), ri := rowid(rleid(value))][,
value[which.max(ri)], id]$V1
mtd2 <- function(df) {
tab <- table(data.frame(as.vector(row(df[,-1L])), unlist(df[,-1L])))
colnames(tab)[max.col(tab, "first")]
}
df = data.frame(id = 1:3,
var1 = c("red","yellow","green"),
var2 = c("red","yellow","green"),
var3 = c("yellow","orange","green"),
var4 = c("orange","green","yellow"))
a0 <- mtd0(df)
identical(a0, mtd_dt(as.data.table(df)))
#[1] TRUE
identical(a0, mtd2(df))
#[1] TRUE
identical(a0, mtd_dt2(as.data.table(df)))
#[1] TRUE
microbenchmark::microbenchmark(times=1L, mtd0(DF), mtd_dt(DT), mtd_dt2(DT), mtd2(DF))
时间:
Unit: milliseconds
expr min lq mean median uq max neval
mtd0(DF) 10083.9941 10083.9941 10083.9941 10083.9941 10083.9941 10083.9941 1
mtd_dt(DT) 1056.2319 1056.2319 1056.2319 1056.2319 1056.2319 1056.2319 1
mtd_dt2(DT) 168.6183 168.6183 168.6183 168.6183 168.6183 168.6183 1
mtd2(DF) 519.2030 519.2030 519.2030 519.2030 519.2030 519.2030 1