我正在使用钻石数据集。
> dput(diamonds_2[1:100,])
structure(list(carat = structure(c(4L, 2L, 4L, 10L, 12L, 5L,
5L, 7L, 3L, 4L, 11L, 4L, 3L, 12L, 1L, 13L, 11L, 11L, 11L, 11L,
11L, 4L, 4L, 12L, 12L, 4L, 5L, 11L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 12L, 7L, 14L, 14L, 14L, 7L, 7L, 13L, 10L, 13L, 13L, 6L,
10L, 5L, 4L, 13L, 3L, 3L, 11L, 11L, 11L, 11L, 11L, 16L, 11L,
11L, 11L, 23L, 9L, 13L, 12L, 12L, 5L, 5L, 11L, 11L, 11L, 11L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 19L, 7L, 5L, 5L, 5L, 5L, 13L,
45L, 61L, 45L, 46L, 53L, 45L, 45L, 71L, 48L, 55L), .Label = c("0.2",
"0.21", "0.22", "0.23", "0.24", "0.25", "0.26", "0.27", "0.28",
"0.29", "0.3", "0.31", "0.32", "0.33", "0.34", "0.35", "0.36",
"0.37", "0.38", "0.39", "0.4", "0.41", "0.42", "0.43", "0.5",
"0.51", "0.52", "0.53", "0.54", "0.55", "0.56", "0.57", "0.58",
"0.59", "0.6", "0.61", "0.62", "0.63", "0.64", "0.65", "0.66",
"0.67", "0.68", "0.69", "0.7", "0.71", "0.72", "0.73", "0.74",
"0.75", "0.76", "0.77", "0.78", "0.79", "0.8", "0.81", "0.82",
"0.83", "0.84", "0.85", "0.86", "0.87", "0.88", "0.89", "0.9",
"0.91", "0.92", "0.93", "0.94", "0.95", "0.96", "0.97", "0.98",
"0.99", "1", "1.01", "1.02", "1.03", "1.04", "1.05", "1.06",
"1.07", "1.08", "1.09", "1.1", "1.11", "1.12", "1.13", "1.14",
"1.15", "1.16", "1.17", "1.18", "1.19", "1.2", "1.21", "1.22",
"1.23", "1.24", "1.25", "1.27", "1.28", "1.29", "1.31", "1.5",
"1.51", "1.52"), class = "factor"), color = structure(c(2L, 2L,
2L, 6L, 7L, 7L, 6L, 5L, 2L, 5L, 7L, 7L, 3L, 7L, 2L, 2L, 6L, 7L,
7L, 7L, 6L, 2L, 5L, 7L, 7L, 4L, 6L, 7L, 1L, 3L, 3L, 3L, 2L, 2L,
1L, 3L, 2L, 5L, 1L, 6L, 6L, 7L, 1L, 1L, 5L, 3L, 5L, 5L, 2L, 5L,
3L, 4L, 6L, 2L, 1L, 6L, 7L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 6L, 4L,
6L, 4L, 4L, 2L, 1L, 5L, 5L, 5L, 5L, 3L, 2L, 1L, 1L, 2L, 2L, 1L,
2L, 6L, 2L, 4L, 5L, 5L, 5L, 6L, 2L, 2L, 4L, 2L, 4L, 2L, 3L, 3L,
2L, 5L), .Label = c("1", "2", "3", "4", "5", "6", "7"), class = "factor"),
clarity = structure(c(2L, 3L, 5L, 4L, 2L, 6L, 7L, 3L, 4L,
5L, 3L, 5L, 3L, 2L, 2L, 1L, 2L, 3L, 3L, 3L, 2L, 4L, 5L, 3L,
3L, 6L, 5L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 4L,
2L, 2L, 3L, 4L, 5L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 5L, 3L, 4L,
4L, 2L, 2L, 3L, 3L, 3L, 5L, 3L, 3L, 3L, 2L, 6L, 7L, 3L, 3L,
7L, 7L, 3L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 6L, 2L,
7L, 7L, 7L, 7L, 6L, 3L, 3L, 2L, 4L, 4L, 2L, 4L, 5L, 2L, 3L,
3L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"), class = "factor"),
price = c(481, 481, 492, 558, 568, 579, 579, 590, 590, 601,
610, 621, 642, 660, 671, 671, 700, 729, 729, 729, 729, 740,
750, 750, 750, 761, 772, 793, 793, 793, 951, 951, 951, 951,
951, 951, 951, 951, 952, 952, 952, 952, 952, 952, 952, 952,
952, 952, 953, 953, 953, 953, 953, 953, 953, 954, 954, 954,
954, 954, 958, 958, 958, 958, 958, 959, 959, 959, 959, 959,
959, 960, 960, 960, 960, 960, 960, 960, 960, 960, 960, 960,
960, 960, 960, 960, 960, 960, 960, 960, 1, 1, 1, 2, 2, 2,
2, 2, 3, 3), cut_new = structure(c(1L, 1L, 2L, 1L, 2L, 3L,
3L, 3L, 2L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 2L,
3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 3L, 2L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 3L, 1L, 1L, 3L, 3L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 3L, 2L,
2L, 2L, 3L, 1L), .Label = c("Above average", "Below average",
"Very Good"), class = "factor")), .Names = c("carat", "color",
"clarity", "price", "cut_new"), row.names = c(NA, 100L), class = "data.frame")
现在我想获得平均价格,这样对于每颗钻石,我都会看到价格,旁边还有平均价格(基于颜色和 cut_new 的组合)。
我尝试了以下代码,但无法正确使用:
尝试1:
head(diamonds_2)
diamonds_2 <- x <- as.data.frame(diamonds_2)
diamonds_2$price <- as.numeric(diamonds_2$price)
mean <- tapply(diamonds_2$price, list(diamonds_2$color, diamonds_2$cut_new), mean, na.rm = T)
combine <- merge (diamonds_2, mean, by.x = "cut_new", by.y= "color")
尝试2:
results <- summaryBy(price~color, data= diamonds_2, FUN = mean)
任何想法如何使其中一个工作?
谢谢