1

我有以下data.frame。抱歉这么大的数据

df <- structure(list(X1 = c("sol_10035", "sol_10040", "sol_10041", 
"sol_10044", "sol_10045", "sol_10047", "sol_10048", "sol_10049", 
"sol_10051", "sol_10175", "sol_10177", "sol_10178", "sol_10182", 
"sol_10183", "sol_10184", "sol_10185", "sol_10187", "sol_10191", 
"sol_10267", "sol_10268", "sol_10268", "sol_10270", "sol_10271", 
"sol_10279", "sol_10282", "sol_10286", "sol_10291", "sol_10299", 
"sol_10312", "sol_10316", "sol_10328", "sol_10331", "sol_10334", 
"sol_10334", "sol_10341", "sol_10344", "sol_10373", "sol_10375", 
"sol_10379", "sol_10393", "sol_10426", "sol_10455", "sol_10456", 
"sol_10457", "sol_10459", "sol_10461", "sol_10493", "sol_10493", 
"sol_10494", "sol_10508", "sol_10529", "sol_10532", "sol_10533", 
"sol_10538", "sol_10554", "sol_10556", "sol_10562", "sol_10563", 
"sol_10566", "sol_10573", "sol_10575", "sol_10575", "sol_10586", 
"sol_10588", "sol_10588", "sol_10596", "sol_10602", "sol_10605", 
"sol_10607", "sol_10608", "sol_10613", "sol_10642", "sol_10655", 
"sol_10692", "sol_10709", "sol_10710", "sol_10711", "sol_10711", 
"sol_10720", "sol_10723", "sol_10035", "sol_10040", "sol_10041", 
"sol_10044", "sol_10045", "sol_10047", "sol_10048", "sol_10049", 
"sol_10051", "sol_10175", "sol_10177", "sol_10178", "sol_10182", 
"sol_10183", "sol_10184", "sol_10185", "sol_10187", "sol_10191", 
"sol_10267", "sol_10268", "sol_10268", "sol_10270", "sol_10271", 
"sol_10279", "sol_10282", "sol_10286", "sol_10291", "sol_10299", 
"sol_10312", "sol_10316", "sol_10328", "sol_10331", "sol_10334", 
"sol_10334", "sol_10341", "sol_10344", "sol_10373", "sol_10375", 
"sol_10379", "sol_10393", "sol_10426", "sol_10455", "sol_10456", 
"sol_10457", "sol_10459", "sol_10461", "sol_10493", "sol_10493", 
"sol_10494", "sol_10508", "sol_10529", "sol_10532", "sol_10533", 
"sol_10538", "sol_10554", "sol_10556", "sol_10562", "sol_10563", 
"sol_10566", "sol_10573", "sol_10575", "sol_10575", "sol_10586", 
"sol_10588", "sol_10588", "sol_10596", "sol_10602", "sol_10605", 
"sol_10607", "sol_10608", "sol_10613", "sol_10642", "sol_10655", 
"sol_10692", "sol_10709", "sol_10710", "sol_10711", "sol_10711", 
"sol_10720", "sol_10723", "sol_10035", "sol_10040", "sol_10041", 
"sol_10044", "sol_10045", "sol_10047", "sol_10048", "sol_10049", 
"sol_10051", "sol_10175", "sol_10177", "sol_10178", "sol_10182", 
"sol_10183", "sol_10184", "sol_10185", "sol_10187", "sol_10191", 
"sol_10267", "sol_10268", "sol_10268", "sol_10270", "sol_10271", 
"sol_10279", "sol_10282", "sol_10286", "sol_10291", "sol_10299", 
"sol_10312", "sol_10316", "sol_10328", "sol_10331", "sol_10334", 
"sol_10334", "sol_10341", "sol_10344", "sol_10373", "sol_10375", 
"sol_10379", "sol_10393", "sol_10426", "sol_10455", "sol_10456", 
"sol_10457", "sol_10459", "sol_10461", "sol_10493", "sol_10493", 
"sol_10494", "sol_10508", "sol_10529", "sol_10532", "sol_10533", 
"sol_10538", "sol_10554", "sol_10556", "sol_10562", "sol_10563", 
"sol_10566", "sol_10573", "sol_10575", "sol_10575", "sol_10586", 
"sol_10588", "sol_10588", "sol_10596", "sol_10602", "sol_10605", 
"sol_10607", "sol_10608", "sol_10613", "sol_10642", "sol_10655", 
"sol_10692", "sol_10709", "sol_10710", "sol_10711", "sol_10711", 
"sol_10720", "sol_10723"), model = c("rf", "rf", "rf", "rf", 
"rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", 
"rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", 
"rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", 
"rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", 
"rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", 
"rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", 
"rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "rf", "pls", 
"pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", 
"pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", 
"pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", 
"pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", 
"pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", 
"pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", 
"pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", 
"pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", "pls", 
"pls", "pls", "pls", "pls", "pls", "pls", "pls", "svm", "svm", 
"svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", 
"svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", 
"svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", 
"svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", 
"svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", 
"svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", 
"svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", 
"svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", "svm", 
"svm", "svm", "svm", "svm", "svm", "svm"), res = c(-1.4815505185651, 
-1.38155061393253, -1.45695583107802, -1.13051971261767, -1.28695599320266, 
-1.42415370000154, -0.99233908412686, -2.78064082881161, -1.82675122357051, 
-2.36807374057301, -2.3320723441529, -2.67207249674079, -3.31455893287706, 
-2.17263806652157, -2.13263810466854, -2.52554767470598, -2.09962849509049, 
-2.5455476556325, -1.48578696223593, -1.90628487150221, -1.90628487150221, 
-1.83127686589692, -1.40744484767072, -0.91430778181862, -1.66578696938849, 
-3.29973012545368, -2.20973021128436, -1.75570400582873, -2.34472653634494, 
-1.99701600927021, -2.17770528855383, -1.88710810574164, -1.08528036261529, 
-1.08528036261529, -1.8554178798845, -0.989835553152767, -2.56453793602913, 
-2.15996905825455, -2.99320911981941, -3.77973014452716, -1.65290958267842, 
-2.08474045490216, -2.51747024189273, -2.84822377084607, -3.25351251761294, 
-0.379912223052841, -1.30992312476689, -1.31894530473519, -0.519820773061511, 
0.383320568439466, -0.678032397099017, -1.80502645861624, -1.28298059315766, 
-0.686698607372736, -2.53445717290867, -1.81877430887043, -1.42311824357665, 
-3.09851087636666, -0.730614682865185, -1.35208143616296, -4.06605176199527, 
-4.06605176199527, -1.53819901592959, -2.54999963642241, -2.54999963642241, 
-1.11960413826569, -1.6918623298347, -1.6464594507407, -2.51490227966849, 
-2.1858871665161, -2.40837297362259, -1.67419013858732, -2.27738982317886, 
-3.92465032506434, -1.68014547664006, -1.60479028580843, -1.16098422658784, 
-1.16098422658784, -2.2860700469459, -2.60339279078186, -1.60446374173762, 
-1.50446383710505, -1.62990791516413, -1.77577097462087, -1.45990807728876, 
-1.63221493851451, -1.63545023911412, -2.62265076737661, -1.6648058229937, 
-2.40231239648778, -2.26302627866696, -2.60302643125485, -3.48192173062671, 
-2.3352390128172, -2.29523905096417, -2.36199830161037, -2.53905017090851, 
-2.38199828253689, -1.77583826558564, -1.82565434372762, -1.82565434372762, 
-1.65744466890755, -1.93251832799871, -1.74661956546712, -1.9558382727382, 
-2.84900758111655, -1.75900766694724, -2.96909312698811, -2.23284694934467, 
-2.46273221150808, -2.99385247608151, -2.11133100195475, -1.29467770150081, 
-1.29467770150081, -2.61167137193353, -2.24234236453998, -2.4301420554697, 
-2.24703874068304, -2.11098856745551, -3.32900760019004, -2.04174546109205, 
-2.55424635828313, -2.3291688314616, -3.11767207564349, -2.73679776365421, 
-0.679498892706623, -1.34569547401812, -1.54652695175621, -2.28126853296633, 
-0.988321000150492, -0.845828268813766, -1.76782259784989, -1.37918084720446, 
-1.45573186892023, -2.64300591408346, -2.33225200577483, -2.00108102566546, 
-2.70721355577772, -0.871369935818877, -1.36107282725622, -2.90317245640462, 
-2.90317245640462, -2.55035817000526, -2.36382423373917, -2.36382423373917, 
-1.27272272089218, -1.45480361979991, -2.08697980861596, -2.297462776927, 
-1.81711901055862, -3.17543198941651, -1.6468599993301, -1.54404294919475, 
-2.21224776646234, -2.27149198369245, -1.31683176214295, -1.68957940184567, 
-1.68957940184567, -2.52600381307447, -2.46727009516503, -1.23762900746866, 
-1.13762910283609, -1.35660114733054, -1.42351638133134, -1.18660130945517, 
-1.45345732465512, -1.35799257070317, -2.05963401620835, -1.78095813578771, 
-2.00463255794565, -2.34118401549464, -2.68118416808253, -3.45959350138887, 
-2.40278974294777, -2.36278978109474, -2.47953319694026, -2.48045707522502, 
-2.49953317786677, -1.52009096893243, -1.95148276380236, -1.95148276380236, 
-1.67193637764583, -1.86837159213928, -0.980999011583017, -1.70009097608499, 
-3.17227187908491, -2.0822719649156, -2.45108936046996, -2.31859767373448, 
-2.32065887524153, -2.73787934669907, -2.00854859174467, -1.31602876206132, 
-1.31602876206132, -2.45550132402396, -2.15815656904391, -2.43147927679445, 
-2.10527139763203, -2.12614206614536, -3.65227189815839, -1.9167735566624, 
-1.85327916244403, -2.08779642509345, -3.22870502746881, -3.02231583272168, 
0.125904142452702, -1.35141078088838, -1.53382318262665, -1.68754873015187, 
-1.06319415541817, -0.76875543071585, -1.66869224278363, -1.31256363602869, 
-1.47844075679374, -2.44956261495404, -2.00040525386917, -1.926297476183, 
-3.02142554128199, -0.382876185263787, -1.25015240078615, -2.75634025399193, 
-2.75634025399193, -2.05201324833915, -2.46440533860831, -2.46440533860831, 
-1.26659002935118, -1.59338585783013, -1.87609295593782, -2.11973415323673, 
-1.94972130042437, -2.86662498733815, -1.57525770399891, -1.78737449864409, 
-2.15571566033041, -2.12772455431051, -1.50613298243881, -1.70884508099009, 
-1.70884508099009, -2.44962268274181, -2.43040932407241), X2 = c(1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1)), .Names = c("X1", "model", "res", "X2"
), row.names = c(472L, 475L, 476L, 479L, 480L, 481L, 482L, 483L, 
484L, 487L, 488L, 489L, 490L, 491L, 492L, 493L, 495L, 498L, 501L, 
502L, 503L, 504L, 505L, 506L, 507L, 509L, 514L, 515L, 516L, 517L, 
518L, 522L, 525L, 526L, 529L, 530L, 533L, 534L, 535L, 536L, 542L, 
549L, 550L, 551L, 552L, 554L, 556L, 557L, 558L, 567L, 577L, 582L, 
583L, 586L, 597L, 598L, 602L, 603L, 606L, 608L, 609L, 610L, 611L, 
612L, 613L, 617L, 618L, 619L, 620L, 621L, 624L, 645L, 650L, 659L, 
666L, 667L, 668L, 669L, 672L, 673L, 1146L, 1149L, 1150L, 1153L, 
1154L, 1155L, 1156L, 1157L, 1158L, 1161L, 1162L, 1163L, 1164L, 
1165L, 1166L, 1167L, 1169L, 1172L, 1175L, 1176L, 1177L, 1178L, 
1179L, 1180L, 1181L, 1183L, 1188L, 1189L, 1190L, 1191L, 1192L, 
1196L, 1199L, 1200L, 1203L, 1204L, 1207L, 1208L, 1209L, 1210L, 
1216L, 1223L, 1224L, 1225L, 1226L, 1228L, 1230L, 1231L, 1232L, 
1241L, 1251L, 1256L, 1257L, 1260L, 1271L, 1272L, 1276L, 1277L, 
1280L, 1282L, 1283L, 1284L, 1285L, 1286L, 1287L, 1291L, 1292L, 
1293L, 1294L, 1295L, 1298L, 1319L, 1324L, 1333L, 1340L, 1341L, 
1342L, 1343L, 1346L, 1347L, 1820L, 1823L, 1824L, 1827L, 1828L, 
1829L, 1830L, 1831L, 1832L, 1835L, 1836L, 1837L, 1838L, 1839L, 
1840L, 1841L, 1843L, 1846L, 1849L, 1850L, 1851L, 1852L, 1853L, 
1854L, 1855L, 1857L, 1862L, 1863L, 1864L, 1865L, 1866L, 1870L, 
1873L, 1874L, 1877L, 1878L, 1881L, 1882L, 1883L, 1884L, 1890L, 
1897L, 1898L, 1899L, 1900L, 1902L, 1904L, 1905L, 1906L, 1915L, 
1925L, 1930L, 1931L, 1934L, 1945L, 1946L, 1950L, 1951L, 1954L, 
1956L, 1957L, 1958L, 1959L, 1960L, 1961L, 1965L, 1966L, 1967L, 
1968L, 1969L, 1972L, 1993L, 1998L, 2007L, 2014L, 2015L, 2016L, 
2017L, 2020L, 2021L), class = "data.frame")

我使用以下代码绘制它并获得绘图

ggplot(ph, aes(x=factor(X2), y=res)) + 
  geom_hline(yintercept = -6:2, color="grey") +
  geom_boxplot(aes(fill=factor(model))) + 
  opts(axis.text.x = theme_text(angle = 45)) +
  geom_point()

在此处输入图像描述

如果我正确理解所有点应沿绿色条显示。为什么蓝条上方多了一个点?数据有问题?我无法理解。我有很多这样的图,其中一些点位于红色或蓝色条的下方或上方。

我想获得:1)所有点都沿着中央绿色条,或2)点沿着它们对应的条(因此模型1的点沿着模型1条等等)

更新:我有一个猜测 - 这可能是一个异常点,它会自动显示。

4

2 回答 2

2

是的,这是一个异常值。我应该早点猜到。

这是一个解决方案。也许它对某人有用。

ggplot(ph, aes(x=factor(X2), y=res)) + 
  geom_hline(yintercept = -6:2, color="grey") +
  geom_boxplot(aes(fill=factor(model)), outlier.size=NA) + 
  opts(axis.text.x = theme_text(angle = 45)) +
  geom_point()
于 2012-08-20T13:14:12.223 回答
1

如果你geom_point()从你的情节中删除,你会看到,这些确实是异常值(正如你所怀疑的那样),它们被描述为每个点?geom_boxplot

除了箱线图之外,您为什么要描绘所有数据点超出了我的范围。

于 2012-08-20T13:09:54.173 回答