我有一个数据集,想运行一个 PCA 图。在此图中,观察值应根据name
( ) 列以相同颜色分组habillage = a$name
。此外,我希望单个观察结果显示它对应于哪个组Age
。我发现label = "none"
没有显示它,但如果我写label = a$Age
什么都没有改变。最后,如何避免在图例中显示重复的黑白文本habillage = a$name
?
libary(plyr)
library(dplyr)
library(factoextra)
df<-structure(list(effective_status = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("ACTIVE", "PAUSED"), class = "factor"),
Age = structure(c(3L, 8L, 6L, 4L, 4L, 5L, 4L, 2L, 4L, 8L,
2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L
), .Label = c("13-17", "18-24", "25-34", "35-44", "45-54",
"55-64", "65+", "Unknown"), class = "factor"), name = structure(c(19L,
23L, 18L, 22L, 9L, 6L, 6L, 9L, 15L, 14L, 12L, 14L, 12L, 13L,
15L, 10L, 11L, 20L, 9L, 13L, 19L, 6L, 9L, 10L, 13L, 14L,
19L, 20L, 21L, 22L, 6L, 10L, 11L, 13L, 14L, 18L, 23L, 12L,
21L, 22L), .Label = c("Automated Boost", "Competitors January",
"Dynamic Ad", "Focus campaign", "Marketing 0-25", "Marketing April",
"Marketing August", "Marketing December", "Marketing February",
"Marketing January", "Marketing July", "Marketing June",
"Marketing March", "Marketing May", "Upsell April", "Upsell August",
"Upsell Boost", "Upsell February", "Upsell January", "Upsell July",
"Upsell June", "Upsell March", "Upsell May"), class = "factor"),
n_obs = c(1L, 1L, 1L, 3L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L), Clicks = c(1364L,
0L, 4919L, 2597L, 2641L, 0L, 915L, 1104L, 63L, 0L, 242L,
206L, 3661L, 11L, 33L, 0L, 246L, 247L, 4L, 0L, 0L, 0L, 0L,0L, 0L, 2009L, 0L, 43L, 166L, 0L, 17L, 0L, 95L, 0L, 137L,
0L, 68L, 0L, 0L, 0L), Impressions = c(12409L, 0L, 58222L,
30115L, 47119L, 0L, 18817L, 17068L, 4175L, 0L, 4528L, 9842L,
98421L, 3L, 6042L, 0L, 7154L, 4253L, 202L, 0L, 0L, 0L, 0L,
150L, 0L, 17117L, 0L, 857L, 1821L, 0L, 1034L, 0L, 1258L,
0L, 948L, 0L, 2972L, 0L, 0L, 0L), Reach = c(12164L, 0L, 46142L,
25282L, 35142L, 0L, 14843L, 13533L, 3624L, 0L, 4528L, 8394L,
58401L, 3L, 5874L, 0L, 7013L, 3586L, 202L, 0L, 0L, 0L, 0L,
150L, 0L, 15349L, 0L, 819L, 1810L, 0L, 1014L, 0L, 938L, 0L,
948L, 0L, 2782L, 0L, 0L, 0L), Spend = c(1153.11, 0, 9663.16,
3202.1, 3393.49, 0, 1739.37, 1344.19, 501.88, 0, 299.22,
565.74, 11228.5, 0.15, 609.05, 0, 709.19, 478.98, 26.12,
0, 0, 0, 0, 22.25, 0, 2485.04, 0, 232.14, 256.1, 0, 129.6,
0, 157.25, 0, 122.62, 0, 717.32, 0, 0, 0.05), Purchase = c(140L,
163L, 104L, 33L, 22L, 17L, 11L, 13L, 2L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), PurchaseValue = c(221595.22,
173029.62, 101894.91, 38974.63, 27336.71, 13247.8, 12461.66,
6186.55, 3754.31, 971.11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
Date_minus_start_time = c(9, 13, 15, 26.3055555555556, 29,
5.5, 5.5, 19, 17, 16.5, 2, 27, 10, 0, 29, 26.5, 13, 15, 19,
43.9583333333333, 30, 5, 28, 8, 29.9583333333333, 21, 19,
3, 9, 17.5, 28, 10, 14, 30.4791666666667, 0, 11, 15, 18,
21, 5)), row.names = c(NA, -40L), groups = structure(list(
effective_status = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("ACTIVE", "PAUSED"), class = "factor"), Age = structure(2:8, .Label = c("13-17",
"18-24", "25-34", "35-44", "45-54", "55-64", "65+", "Unknown"
), class = "factor"), .rows = structure(list(c(8L, 11L, 12L
), c(1L, 13L, 14L, 15L), c(4L, 5L, 7L, 9L, 16L, 17L, 18L),
c(6L, 19L, 20L, 21L), c(3L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L), 31:37, c(2L, 10L, 38L, 39L, 40L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -7L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
a <- subset(helmes[sample(nrow(helmes), 100), ], !(name %in% c("Upsell Boost","Marketing 0-25","Dynamic Ad"))) %>%
group_by(effective_status,Age,name) %>%
summarise(
n_obs = n(),
Clicks = sum(Clicks,na.rm = TRUE),
Impressions = sum(Impressions,na.rm = TRUE),
Reach = sum(Reach,na.rm = TRUE),
Spend = sum(Spend,na.rm = TRUE),
Purchase = sum(Purchase,na.rm = TRUE),
PurchaseValue = sum(PurchaseValue,na.rm = TRUE),
Date_minus_start_time = mean(Date_minus_start_time,na.rm = TRUE)
) %>% arrange(desc(PurchaseValue))
res.pca <- prcomp(a[4:ncol(a)], scale = TRUE)
fviz_pca_ind(res.pca,
#col.ind = a$name, # color by groups
label = "none",
#geom = c("point","text"),
habillage = a$name, # color by groups
#palette = c("#00AFBB", "#FC4E07", "#2CA25F"),
addEllipses = TRUE, # Concentration ellipses
ellipse.type = "confidence",
legend.title = "Groups",
repel = TRUE )