我正在尝试为“粘性”定义一个函数 - 一个衡量用户参与度的业务分析指标 - 我的函数正在返回一个填充有意外数据的数据框。
stickiness <- function(tdata) {
require(plyr)
mau_unique <- dlply(.data = tdata,
.variables = "dt",
.fun = function(x){unique(x$username)})
dates_char <- names(mau_unique)
dates_vector <- as.Date(dates_char[28:(length(dates_char))],
format = "%Y-%m-%d")
output_df <- data.frame(dates_vector,
matrix(data = 0,
nrow = length(dates_char) - 27,
ncol = 3))
colnames(output_df) <- c("Date", "DAU", "MAU", "Stickiness")
for (i in 1:length(dates_vector)) {
dt <- dates_vector[i]
output_df[i, "DAU"] <- length(unlist(mau_unique[[as.character(dt)]][2]))
set28 <- unique(unlist(lapply(X = mau_unique[i:(i + 27)], FUN = "[[", 2)))
output_df[i, "MAU"] <- length(set28)
output_df[i, "Stickiness"] <- output_df[i, "DAU"] / output_df[i, "MAU"]
}
return(output_df)
}
返回以下内容:
Date DAU MAU Stickiness
1 2012-04-28 1 28 0.03571429
2 2012-04-29 1 28 0.03571429
3 2012-04-30 1 28 0.03571429
4 2012-05-01 1 28 0.03571429
5 2012-05-02 1 28 0.03571429
6 2012-05-03 1 28 0.03571429
7 2012-05-04 1 28 0.03571429
8 2012-05-05 1 28 0.03571429
9 2012-05-06 1 28 0.03571429
10 2012-05-07 1 28 0.03571429
我预计会出现以下情况:
Date DAU MAU Stickiness
1 2012-04-28 25000 250000 0.10000000
... ... ... ... ...
10 2012-05-07 27371 284114 0.09633809
我怀疑这个问题与我正在评估的环境有关。
更新的样本数据:
> tdata
dt username
4236 2012-04-06 241343664
3091 2012-04-06 306001012
2936 2012-04-06 388682041
5790 2012-04-05 235612064
6763 2012-04-05 69650072
3392 2012-04-06 617142
7684 2012-04-05 189752749
3904 2012-04-06 255852653
7915 2012-04-05 182713266
6107 2012-04-05 187675644
UPDATE 工作功能(使用 Brian Diggs 的回答):
stickiness <- function(tdata) {
require(plyr)
mau_unique <- dlply(.data = tdata,
.variables = "dt",
.fun = function(x){unique(x$username)})
dates_char <- names(mau_unique)
dates_vector <- as.Date(dates_char[28:(length(dates_char))],
format = "%Y-%m-%d")
output_df <- data.frame(dates_vector,
matrix(data = 0,
nrow = length(dates_char) - 27,
ncol = 3))
colnames(output_df) <- c("Date", "DAU", "MAU", "Stickiness")
for (i in 1:length(dates_vector)) {
dt <- dates_vector[i]
output_df[i, "DAU"] <- length((mau_unique[[as.character(dt)]])
set28 <- unique(do.call(c, mau_unique[i:(i + 27)]))
output_df[i, "MAU"] <- length(set28)
output_df[i, "Stickiness"] <- output_df[i, "DAU"] / output_df[i, "MAU"]
}
return(output_df)
}