鉴于少量数据点,您希望绘制所有点,而不仅仅是异常值。这将有助于找出箱线图中点的分布。
您可以使用 geom_jitter 来做到这一点,但请注意 box_plot 已经为异常值绘制了点,因此为了不显示它们两次,您需要使用 关闭 boxplot 的异常值显示geom_boxplot(outlier.shape = NA)
。
library("ggplot2")
test <-structure(list(value = c(3, 5, 3, 3, 6, 4, 5, 4, 6, 4, 6, 4, 4, 6, 5, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 5, 6, 6, 4, 3, 5\
, 4, 6, 5, 6, 4, 5, 5, 3, 4, 4, 6, 4, 4, 5, 5, 3, 4, 5, 8, 8, 8, 8, 9, 6, 6, 7, 6, 9), places = structure(c(1L, 2L, 1L, 1L, 1L\
, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, \
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L), .Label = c("a", "b"), class =\
"factor")), .Names = c("value", "places"), row.names = c(NA, -60L), class = "data.frame")
# adding a level that you will use latter for giving colors
l <- rep(c(10,20,30,40,50,60), 10)
test$levels<-l
# [1]
# original plot
ggplot(test, aes(x=places,y=value))+geom_boxplot()
# [2]
# plot with outlier from boxplot and the points jittered to see
# distribution (outliers and the same point from position jitter would be
# counted twice for each different height)
dev.new()
ggplot(data=test, aes(x=places, y=value)) + geom_boxplot() + geom_jitter(position=position_jitter(width=0.1, height=0))
# [3]
# make wider the jitter to avoid overplotting because there are a lot
# of points with the same value, also remove the outliers from boxplot
# (they are plotted with the geom_jitter anyway)
dev.new()
ggplot(data=test, aes(x=places, y=value)) + geom_boxplot(outlier.shape = NA) +
geom_jitter(position=position_jitter(width=0.3, height=0))
# [4]
# adding colors to the points to see if there is a sub-pattern in the distribution
dev.new()
ggplot(data=test, aes(x=places, y=value)) + geom_boxplot(outlier.shape = NA) +
geom_jitter(position=position_jitter(width=0.3, height=0), aes(colour=levels))
# [5]
# adding a bit of vertical jittering
# jittering (a good option for a less discrete datasets)
dev.new()
ggplot(data=test, aes(x=places, y=value)) + geom_boxplot(outlier.shape = NA) +
geom_jitter(position=position_jitter(width=0.3, height=0.05), aes(colour=levels))
# [6]
# finally remember that position_jitter makes a jittering of a 40% of
# the resolution of the data, so if you forget the height=0 you will
# have a total different picture
dev.new()
ggplot(data=test, aes(x=places, y=value)) + geom_boxplot(outlier.shape = NA) +
geom_jitter(position=position_jitter(width=0.2))