3

一般来说,我对 R 和统计数据非常陌生,并且在向 PCA 图添加多个置信椭圆时遇到了麻烦。

我的兴趣是用 95% 的置信椭圆突出 PCA 图中的潜在分组/集群。我尝试在 R 中使用 dataEllipse 函数,但是我无法弄清楚如何将具有不同中心的多个椭圆添加到 PCA 图(中心将位于似乎包含集群的各个点,在这种情况下是石源和石工具可能来自该来源)。

感谢您对此的任何帮助!{

lithic_final <- LITHIC.DATASHEET.FOR.R.COMPLETE.FORMAT
lithic_final

pca1 <- princomp(lithic_final); pca1

lithic_source <- c("A1", "A1", "A1", "A1", "A2","A2", "A2", "A3","A3","A3","B","B","B","B","B","B","C","C","C","C","C","C","C","D","D","D","D","D","D","D","D","E","E","E","E","E","E","E","E","F","F","G","G","G","G","H","H","H","H","H","H","H","I1","I1","I1","I2","I2","I2","I2","I2","J1","J1","J2","J2","J2","J2","J2","J2","J2","J2","J2","K","K","K","K","K","K","K","L","L","L","L","L","L","L","L","L","L","L","L","L","L","BB1","BB1","BB1","FC","FC","FC","JRPP","JRPP","JRPP","BB2","BB2","BB2","BB2","MWP","MWP","MWP","MWP","RPO","RPO","RPO")

lithic_source

summary(pca1)

plot(pca1)

#Plotting the scores with the Lithic Source Info
round(pca1$scores[,1:2], 2)
pca_scores <-round(pca1$scores[,1:2], 2)
plot(pca1$scores[,1], pca1$scores[,2], type="n")
text(pca1$scores[,1], pca1$scores[,2],labels=abbreviate(lithic_source, minlength=3), cex=.45)



#Plotting PCA Scores of EACH SAMPLE for PCA 2 and 3 with Lithic Source Info
round(pca1$scores[,2:3], 2)
pca2_3_scores <-round(pca1$scores[,2:3], 2)
plot(pca1$scores[,2], pca1$scores[,3], type="n")
text(pca1$scores[,2], pca1$scores[,3], labels=abbreviate(lithic_source, minlength=3), cex=.45)

#Plotting PCA Scores of EACH SAMPLE for PCA 3 and 4 with Lithic Source Info
round(pca1$scores[,3:4], 2)
pca3_4_scores <-round(pca1$scores[,3:4], 2)
plot(pca1$scores[,3], pca1$scores[,4], type="n")
text(pca1$scores[,3], pca1$scores[,4], labels=abbreviate(lithic_source, minlength=3), cex=.45)

#Plotting PCA Scores of EACH SAMPLE for PCA 1 and 3 with Lithic Source Info
round(pca1$scores[,1:3], 2)
pca1_3_scores <-round(pca1$scores[,1:3], 2)
plot(pca1$scores[,1], pca1$scores[,3], type="n")
text(pca1$scores[,1], pca1$scores[,3], labels=abbreviate(lithic_source, minlength=3), cex=.45)

#Plotting PCA Scores of EACH SAMPLE for PCA 1 and 4 with Lithic Source Info
round(pca1$scores[,1:4], 2)
pca1_4_scores <-round(pca1$scores[,1:4], 2)
plot(pca1$scores[,1], pca1$scores[,4], type="n")
text(pca1$scores[,1], pca1$scores[,4], labels=abbreviate(lithic_source, minlength=3), cex=.45)

#TRYING TO GET ELLIPSES ADDED TO PCA 1 and 4 scores
dataEllipse(pca1$scores[,1], pca1$scores[,4],centers=12,add=TRUE,levels=0.9, plot.points=FALSE)


structure(list(Ca.K12 = c(418L, 392L, 341L, 251L, 297L, 238L, 
258L, 5L, 2L, 37L), Cr.K12 = c(1L, 12L, 15L, 6L, 9L, 6L, 35L, 
7L, 45L, 32L), Cu.K12 = c(89L, 96L, 81L, 63L, 88L, 103L, 104L, 
118L, 121L, 90L), Fe.K12 = c(18627L, 18849L, 18413L, 12893L, 
17757L, 17270L, 16198L, 2750L, 4026L, 3373L), K.K12 = c(20L, 
23L, 28L, 0L, 34L, 17L, 45L, 102L, 150L, 147L), Mn.K12 = c(205L, 
212L, 235L, 120L, 216L, 212L, 246L, 121L, 155L, 115L), Nb.K12 = c(139L, 
119L, 154L, 91L, 122L, 137L, 137L, 428L, 414L, 428L), Rb.K12 = c(99L, 
42L, 79L, 49L, 210L, 243L, 168L, 689L, 767L, 705L), Sr.K12 = c(3509L, 
3766L, 3481L, 2715L, 2851L, 2668L, 2695L, 202L, 220L, 217L), 
    Ti.K12 = c(444L, 520L, 431L, 293L, 542L, 622L, 531L, 82L, 
    129L, 84L), Y.K12 = c(135L, 121L, 105L, 74L, 144L, 79L, 85L, 
    301L, 326L, 379L), Zn.K12 = c(131L, 133L, 108L, 78L, 124L, 
    111L, 114L, 81L, 78L, 59L), Zr.K12 = c(1348L, 1479L, 1333L, 
    964L, 1506L, 1257L, 1296L, 3967L, 4697L, 4427L)), .Names = c("Ca.K12", 
"Cr.K12", "Cu.K12", "Fe.K12", "K.K12", "Mn.K12", "Nb.K12", "Rb.K12", 
"Sr.K12", "Ti.K12", "Y.K12", "Zn.K12", "Zr.K12"), row.names = c(NA, 
10L), class = "data.frame")
4

2 回答 2

6

如果您专注于您的问题而不是所有无关紧要的东西,我认为您会收到更快的回复。您向我们提供了绘制一堆与您的问题无关的主成分的命令。问题是,如何按组绘制椭圆?您的 10 行和 3 组样本数据没有帮助,因为 3 个点不足以绘制数据椭圆。您正在使用包 car 中的 dataEllipse 函数,它对您的问题有最简单的答案:

首先,一个可重现的例子:

set.seed(42) # so you can get the same numbers I get
source_a <- data.frame(X1=rnorm(25, 50, 5), X2=rnorm(25, 40, 5))
source_b <- data.frame(X1=rnorm(25, 20, 5), X2=rnorm(25, 40, 5))
source_c <- data.frame(X1=rnorm(25, 35, 5), X2=rnorm(25, 25, 5))
lithic_dat <- rbind(source_a, source_b, source_c)
lithic_source <- c(rep("a", 25), rep("b", 25), rep("c", 25))

用 scatterplot() 绘制椭圆并添加文本:

scatterplot(X2~X1 | lithic_source, data=lithic_dat, pch="", smooth=FALSE, 
     reg.line=FALSE, ellipse=TRUE, levels=.9)
text(lithic_dat$X1, lithic_dat$X2, lithic_source, cex=.75)

可以调整散点图来做任何你想做的事情,但也可以在不使用它的情况下绘制椭圆:

sources <- unique(lithic_source) # vector of the different sources
plot(lithic_dat$X1, lithic_dat$X1, type="n")
text(lithic_dat$X1, lithic_dat$X2, lithic_source, cex=.75)
for (i in sources) with(lithic_dat, dataEllipse(X1[lithic_source==i],
     X2[lithic_source==i], levels=.9, plot.points=FALSE))

这将适用于您的主要组件和任何其他数据。

于 2012-08-23T16:50:47.220 回答
3

这是一个简单的解决方案,它使用名为 ggbiplot 的包(可在 github 上获得)和 Iris 数据。我希望这就是你要找的。

library(devtools);install_github('vqv/ggbiplot')
library(ggbiplot)
pca = prcomp(iris[,1:4]) 
ggbiplot(pca,groups = iris$Species,ellipse = T,ellipse.prob = .95)
于 2016-05-24T03:37:32.923 回答