您好,我有以下 data.frame(附加)。我想添加一个带有归一化计数的附加列N = N/sum(N)
。我以前的 data.frame 没有日期列,并且能够使用
oo[, N.norm := N/sum(N), by=Operator]
我试图将 Date 添加到 by 函数
oo[, N.norm := N/sum(N), by=Operator,Date]
但收到错误消息
Error in `[.data.frame`(oo, , `:=`(N.norm, N/sum(N)), by = Operator, Date) :
unused argument(s) (by = Operator)
例如,对于“2013 年 1 月”月份的操作员“A”,我有N
每个ROI_SCore
= c("Good","OK","Poor","Crap") 的计数。我想为该组合(A 和 2013 年 1 月)加总 N 并将计数N
除以sum(N)
另一方面,任何人都可以向我提供有关在 R 中操作 data.frames 的体面介绍吗
structure(list(Operator = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("A",
"D", "J", "L", "M"), class = "factor"), ROI_Score = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L), .Label = c("Crap", "Good", "OK", "Poor"), class = "factor"),
Date = c("Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013", "Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013"), N = c(0, 0, 0, 0, 0, 1, 2, 15, 1, 5, 3, 2, 3,
1, 0, 3, 0, 5, 5, 1, 0, 0, 0, 1, 0, 14, 17, 16, 8, 7, 5,
10, 6, 1, 5, 24, 27, 31, 16, 15, 0, 0, 0, 0, 0, 26, 24, 20,
11, 18, 3, 4, 17, 3, 2, 20, 36, 12, 21, 9, 0, 0, 0, 0, 0,
3, 12, 5, 12, 4, 0, 0, 3, 4, 0, 29, 37, 41, 25, 10, 0, 0,
0, 0, 0, 9, 9, 15, 17, 3, 6, 4, 5, 4, 1, 14, 13, 9, 15, 9
)), .Names = c("Operator", "ROI_Score", "Date", "N"), row.names = c(NA,
100L), class = "data.frame")
我不确定数据是 data.frame 还是 data.table 格式。这是我的代码,改编自 Arun 给出的解决方案(重塑/重塑数据框以创建标准化条形图和饼图)
df <- data.frame(read.csv("/misc/jaguar_data/report/system/db_fs/roi_scores.csv"))
#Get date into nice structure for faceting
df$Date = strftime(strptime(df$Date,f="%d/%m/%Y"), "%b %Y")
dt <- data.table(df)
ops <- as.character(unique(dt$Operator))
scr <- as.character(unique(dt$ROI_Score))
dts <- unique(dt$Date)
oo <- setkey(dt[, .N, by="Operator,ROI_Score,Date"], Operator,
ROI_Score,Date)[CJ(ops, scr,dts)][is.na(N), N:= 0L]
oo[, N.norm := N/sum(N), by=Operator]