我正在尝试使用 rgl 库可视化数据集:数据集的尺寸为 15927 x 6,并包含 pca 转换的分数。恐怕我不知道如何在这里显示所说的数据集,(这是我的第一个问题,请见谅)。我使用 plot3d 绘制了前三个向量,但它只显示了一个非常小的点子集(少于 50 个)。我尝试更改剪辑,但无论我增加还是减少限制,它都只显示任何内容或仅显示一点。我只是在这个特定的数据集上遇到了这个问题。我在同一个脚本的另一个绘图上使用了相同的函数,没有问题。plot_ly 也一样。我不明白是什么原因造成的,任何帮助将不胜感激,谢谢。
这是我的代码:
new.tw <- as.data.frame(fit.data$objectscores) # new coordinates on the principal components
plot3d(fit.data$objectscores[,4:6], xlab = "Political", ylab = "Civil", zlab = "Partizan") # output only a subset of the original matrix
fig1 <- plot_ly(new.tw, x = ~Racial, y = ~Activism, z = ~Partizan)
fig1 <- fig %>% add_markers()
fig1 <- fig %>% layout(scene = list(xaxis = list(title = 'Political'),
yaxis = list(title = 'Civil'),
zaxis = list(title = 'Partizan')))
fig1 #same problem here
然而,这第二个电话完美无缺。sim.result 是另一个数据集,尺寸为 1000 x 6。我也尝试了 100000 次观察,但仍然可以正常工作。
plot3d(sim.results[,2:4] , col=sim.results[,1], xlab = "Racial", ylab = "Activism", zlab = "Partizan")
提前谢谢您,如果我需要提供更多信息,请告诉我。
编辑:根据要求,这是前 20 条记录
dput(head(fit.data$objectscores, 20)))
structure(c(1.50298476621238, 1.50298476621238, 1.50298476621238,
-1.58568117857448, -0.816526654373636, 0.565715252062814, 1.50298476621238,
0.938169846654393, -0.264438304359833, -0.89895908781598, -1.58568117857448,
0.854894719853944, -0.816526654373636, -1.58568117857448, -0.816526654373636,
-0.264438304359833, 1.50298476621238, 1.50298476621238, -0.816526654373636,
-0.816526654373636, -0.22195707370619, -0.22195707370619, -0.22195707370619,
2.92311622515375, -0.9872594008058, 0.155224901527586, -0.22195707370619,
0.507704817966882, -0.0218926196634168, 3.56657236205749, 2.92311622515375,
0.57259387796395, -0.9872594008058, 2.92311622515375, -0.9872594008058,
-0.0218926196634168, -0.22195707370619, -0.22195707370619, -0.9872594008058,
-0.9872594008058, -0.61967189717146, -0.61967189717146, -0.61967189717146,
-0.0641863006278279, 0.751877614031531, 0.467590743969311, -0.61967189717146,
-2.85074009594914, -0.215480514804859, 0.978902546006621, -0.0641863006278279,
0.7286746842318, 0.751877614031531, -0.0641863006278279, 0.751877614031531,
-0.215480514804859, -0.61967189717146, -0.61967189717146, 0.751877614031531,
0.751877614031531, -1.15671269188201, -1.15671269188201, -1.15671269188201,
-1.02621692332093, -0.192044576245641, -1.63097963620485, -1.15671269188201,
2.31326205223891, 0.181116196817509, 0.136775914244596, -1.02621692332093,
1.01664957434761, -0.192044576245641, -1.02621692332093, -0.192044576245641,
0.181116196817509, -1.15671269188201, -1.15671269188201, -0.192044576245641,
-0.192044576245641, -0.716947334740032, -0.716947334740032, -0.716947334740032,
-0.211774508543306, 0.412900956387458, 1.50372485320379, -0.716947334740032,
3.97207352078232, -0.156636810319296, 0.31991195471995, -0.211774508543306,
0.237902829961732, 0.412900956387458, -0.211774508543306, 0.412900956387458,
-0.156636810319296, -0.716947334740032, -0.716947334740032, 0.412900956387458,
0.412900956387458, 0.777882430991955, 0.777882430991955, 0.777882430991955,
0.622604024952591, 0.669399975560595, -2.34610692261696, 0.777882430991955,
1.50641686438724, -1.35856726916514, 1.17683296363845, 0.622604024952591,
-0.281401001859944, 0.669399975560595, 0.622604024952591, 0.669399975560595,
-1.35856726916514, 0.777882430991955, 0.777882430991955, 0.669399975560595,
0.669399975560595), .Dim = c(20L, 6L), .Dimnames = list(c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20"), c("D1", "D2", "D3",
"D4", "D5", "D6")))
与
head(fit.data$objectscores, 20)
D1 D2 D3 D4 D5 D6
1 1.5029848 -0.22195707 -0.6196719 -1.1567127 -0.7169473 0.7778824
2 1.5029848 -0.22195707 -0.6196719 -1.1567127 -0.7169473 0.7778824
3 1.5029848 -0.22195707 -0.6196719 -1.1567127 -0.7169473 0.7778824
4 -1.5856812 2.92311623 -0.0641863 -1.0262169 -0.2117745 0.6226040
5 -0.8165267 -0.98725940 0.7518776 -0.1920446 0.4129010 0.6694000
6 0.5657153 0.15522490 0.4675907 -1.6309796 1.5037249 -2.3461069
7 1.5029848 -0.22195707 -0.6196719 -1.1567127 -0.7169473 0.7778824
8 0.9381698 0.50770482 -2.8507401 2.3132621 3.9720735 1.5064169
9 -0.2644383 -0.02189262 -0.2154805 0.1811162 -0.1566368 -1.3585673
10 -0.8989591 3.56657236 0.9789025 0.1367759 0.3199120 1.1768330
11 -1.5856812 2.92311623 -0.0641863 -1.0262169 -0.2117745 0.6226040
12 0.8548947 0.57259388 0.7286747 1.0166496 0.2379028 -0.2814010
13 -0.8165267 -0.98725940 0.7518776 -0.1920446 0.4129010 0.6694000
14 -1.5856812 2.92311623 -0.0641863 -1.0262169 -0.2117745 0.6226040
15 -0.8165267 -0.98725940 0.7518776 -0.1920446 0.4129010 0.6694000
16 -0.2644383 -0.02189262 -0.2154805 0.1811162 -0.1566368 -1.3585673
17 1.5029848 -0.22195707 -0.6196719 -1.1567127 -0.7169473 0.7778824
18 1.5029848 -0.22195707 -0.6196719 -1.1567127 -0.7169473 0.7778824
19 -0.8165267 -0.98725940 0.7518776 -0.1920446 0.4129010 0.6694000
20 -0.8165267 -0.98725940 0.7518776 -0.1920446 0.4129010 0.6694000
这是 str() 的结果:
str(fit.data$objectscores)
num [1:15927, 1:6] 1.503 1.503 1.503 -1.586 -0.817 ...
- attr(*, "dimnames")=List of 2
..$ : chr [1:15927] "1" "2" "3" "4" ...
..$ : chr [1:6] "D1" "D2" "D3" "D4" ...
我没有提到它(对不起),但我想同样的事情并尝试运行
unique(fit.data$objectscores[,1])
对于所有列,并获得 90-100 个值作为结果。同时,感谢您的回复。
编辑 2:是的,我后来添加了列的名称:
colnames(new.tw) <-c("Racial","Activism","Partizan","Political","Civil","Party")
我将 Gifi 库用于非线性 PCA(我从序数变量矩阵开始)。这是我(几乎)完整的代码(减去我创建原始数据集的部分以及我在 mclust 库中应用一些聚类功能的下一部分)
library(Gifi)
library(rgl)
library(plotly)
data <- read.csv("twitter.data.csv")
data<-data[,-c(1,2)]
#perform ordinal MVAOS (Multivariate Analysis with Optimal Scaling)
fit.data <- princals(data, ndim = 6)
summary(fit.data)
trans <- fit.data$transform #new data after rotation
fit.data$loadings #loadings
new.tw <- as.data.frame(fit.data$objectscores) # new coordinates on the principal components
colnames(new.tw) <-c("Racial","Activism","Partizan","Political","Civil","Party")
plot(fit.data, "screeplot") #screeplot
#plots
plot3d(fit.data$objectscores[,1:3], xlab = "Racial", ylab = "Activism", zlab = "Partizan")
plot3d(fit.data$objectscores[,4:6], xlab = "Political", ylab = "Civil", zlab = "Party")
fig1 <- plot_ly(new.tw, x = ~Racial, y = ~Activism, z = ~Partizan)
fig1 <- fig1 %>% add_markers()
fig1 <- fig1 %>% layout(scene = list(xaxis = list(title ='Racial'),yaxis = list(title = 'Activism'),zaxis = list(title ='Partizan')))
fig1
fig2 <- plot_ly(new.tw, x = ~Political, y = ~Civil, z = ~Party)
fig2 <- fig2 %>% add_markers()
fig2 <- fig2 %>% layout(scene = list(xaxis = list(title = 'Political'),yaxis = list(title = 'Civil'),zaxis = list(title ='Party')))
fig2
plot(fit.data, "loadplot", main = "Loadings Plot Twitter Data")
plot(fit.data, "biplot", main = "Biplot Twitter Data")
编辑 3:我添加了一张图片来澄清:在最后三列中,唯一的三元组实际上是 288,但是,我希望你可以从图像中看到,只有 78 个左右被绘制。
unique(fit.data$objectscores[,4:6])
D4 D5 D6
1 -1.15671269 -0.71694733 0.77788243
4 -1.02621692 -0.21177451 0.62260402
5 -0.19204458 0.41290096 0.66939998
6 -1.63097964 1.50372485 -2.34610692
8 2.31326205 3.97207352 1.50641686
9 0.18111620 -0.15663681 -1.35856727
10 0.13677591 0.31991195 1.17683296
12 1.01664957 0.23790283 -0.28140100
25 -0.15744958 -0.25383428 1.59358003
33 2.54459730 -2.87594628 0.81783240
40 0.01738647 -0.22521022 -1.09709860
48 -1.36614373 -0.07883616 2.54438101
51 1.31399894 3.50896047 0.69071926
80 0.27039555 3.52239618 2.41042189
96 -2.34122650 -1.31832018 -3.02243232
98 -0.14311675 -0.68745042 -1.31840645
149 -2.20031608 -0.70351162 2.49758506
165 0.19855888 -0.22754270 -1.39564370
184 2.10383101 4.61018470 3.27291544
234 -0.86248719 -0.14320110 0.36113536
240 -0.02695382 0.25133854 1.43830163
287 0.43412528 3.59096959 2.14895323
385 -0.63171653 1.96683790 -1.53040932
430 2.33516626 -2.23783510 2.58433098
465 -1.23564797 0.42633667 2.38910261
567 0.80721853 0.87601401 1.48509758
622 -0.35254779 -0.04933924 0.44809213
665 1.18037930 0.30647624 -0.54286967
671 -0.02831485 0.48147437 0.40793131
698 2.70832703 -2.80737286 0.55636374
830 -2.67458303 1.51716057 -0.62640429
932 1.47772867 3.57753388 0.42925060
957 -0.14311675 -0.68745042 -1.31840645
1148 -1.84041068 2.14183603 -0.57960834
1390 0.18111620 -0.15663681 -1.35856727
1392 -0.06953126 3.65533453 4.33219888
1413 0.18598433 -3.96905623 -1.10750131
1469 1.01664957 0.23790283 -0.28140100
1527 -1.15671269 -0.71694733 0.77788243
1542 0.13989979 3.01722336 2.56570030
1761 1.37049815 -3.36768339 2.69281344
1950 0.09555950 3.49377212 5.10110053
2008 -2.20031608 -0.70351162 2.49758506
2098 1.26829763 4.21564506 2.19574918
2284 3.84120978 0.85822441 2.60565027
2343 0.27039555 3.52239618 2.41042189
2390 3.54386041 -2.41283322 1.63353000
2426 1.26965866 3.98550923 3.22611949
2436 -1.46724991 1.57229826 -2.60757559
2451 2.54459730 -2.87594628 0.81783240
2580 0.20178539 -0.62120949 -1.87842022
2881 -1.02621692 -0.21177451 0.62260402
2928 3.54386041 -2.41283322 1.63353000
3219 -1.04461403 2.41585051 -1.23461445
3221 -2.80507879 1.01198774 -0.47112589
3366 1.10456790 4.14707165 2.45721784
3457 2.47699178 4.04064693 1.24494820
3492 -1.11313465 3.66877025 6.05190151
3911 -0.23638486 0.88944972 3.20480021
4322 -1.07191824 0.49491008 2.12763394
4347 1.31399894 3.50896047 0.69071926
4809 -2.51085330 1.58573398 -0.88787296
4848 0.34971406 -3.90048282 -1.36896997
5034 0.97094826 0.94458742 1.22362891
5143 1.66472364 -2.79393715 2.27606637
5511 0.36228861 -0.15896929 -1.65711237
6081 1.01987609 -0.15576396 -0.76417752
6512 1.50099391 -2.86251056 2.53753503
6814 -0.88088430 2.48442393 -1.49608311
7164 -0.85761906 -3.95562052 0.61220132
7182 -0.86248719 -0.14320110 0.36113536
7614 2.50025702 -2.39939751 3.35323263
7716 0.02061298 -0.61887701 -1.57987512
7748 -1.63097964 1.50372485 -2.34610692
7816 0.18598433 -3.96905623 -1.10750131
7908 0.03805566 -0.68978290 -1.61695155
8474 -2.17749677 -1.24974677 -3.28390098
8807 0.03805566 -0.68978290 -1.61695155
9533 3.33442937 -1.77472205 3.40002858
10256 -1.46724991 1.57229826 -2.60757559
10488 -1.20105297 -0.24039857 3.31328266
10906 -1.37797055 5.25133126 1.16141357
11444 -2.55065755 -0.68020900 -1.25593373
12051 2.38409409 -3.33818648 0.59652455
13041 -0.36688063 0.38427690 3.36007861
13143 0.85614636 -0.22433737 -0.50270885
13573 1.18037930 0.30647624 -0.54286967
13730 0.85614636 -0.22433737 -0.50270885
14327 0.19855888 -0.22754270 -1.39564370
14472 2.72899623 -3.27194554 0.03651079
14594 -0.97554028 -0.71927981 0.47933733
14746 0.64671531 0.41377381 1.26378973
15290 2.36976125 -2.90457034 3.50851104
15358 0.02061298 -0.61887701 -1.57987512
15380 -1.31721591 -1.17918753 0.55657458
15673 1.06022762 4.62362041 4.99261807
> length(unique(fit.data$objectscores[,4:6]))
[1] 288
无论如何,jitter()
似乎工作正常,非常感谢。我还是不明白为什么我看不到所有的 288 分。它们是否太相似而不能以不同的方式绘制?我可以增加规模吗?我也尝试将数据集乘以 10 和 100,但无济于事。
编辑 4 实际上,手动重新计算 中的值unique(fit.data$objectscores[,4:6])
,它们似乎是 95 条记录而不是 288 条。我想 length(unique(fit.data$objectscores[,4:6]))
这不是计算它们的正确方法。