0

我正在尝试使用包GGally和创建散点图矩阵ggpairs。在我的数据集中tol,我有几个分类的人口统计变量,还有几个是连续的。我用我想要的变量创建了一个数据框,并尝试省略 NA 值,因为我不断收到此错误:

cor.test.default(x, y, method = method, use = use) 中的错误:没有足够的有限观察"

当我不包括美学映射时,散点图工作得很好。即使我弄乱了我的 csv 文件以确保没有空单元格,我仍然会收到此错误。

这是代码:

cs <- tol[c("location","comp_sat_avg","burnout_avg","sec_stress_avg","burnout_ee_avg","burnout_dp_avg","burnout_pa_avg","obs_avg","desc_avg","aware_avg","nonjudg_avg","nonreac_avg","wkplre_wc_avg","Efficacy_avg","Lotr_avg","hsecontrol_avg","hsemsupport_avg","hsepsupport_avg","hserole_avg","hsedemands_avg")]
csdata <- na.omit(cs)

ggpairs(csdata,lower=list(continuous="smooth"),mapping=ggplot2::aes(color= location)) +
  theme_bw()

我还有其他三个分类变量需要单独分组,因此非常感谢任何帮助。

根据 stefan 的评论,这里是我的数据集的一个示例:

tol  <- structure(list(location = c("Mukono Health Center IV", "Mukono Health Center IV", 
"Goma Health Center III", "Goma Health Center III", "Goma Health Center III", 
"Kawolo General Hospital", "Kawolo General Hospital", "Mukono Health Center IV", 
"Mukono Health Center IV", "Lwanyonyi VHT", "Mukono Health Center IV", 
"Goma Health Center III", "Mukono Health Center IV", "Mukono Health Center IV", 
"Goma Health Center III", "Mukono Health Center IV", "Mukono Health Center IV", 
"Mukono Health Center IV", "Mukono Health Center IV", "Lwanyonyi VHT"
), comp_sat_avg = c(4.6, 4.9, 4.4, 4.2, 3.7, 4.2, 3, 4.3, 3.8, 
4.4, 2.8, 3.9, 4.7, 4.4, 3.22, 4.6, 1.8, 4.67, 3, 4.8), burnout_avg = c(2.2, 
3.2, 2.1, 2.7, 3.4, 2.1, 3.11, 2.4, 2.6, 2.5, 2.89, 2, 1.8, 1.8, 
2.78, 2.6, 3.5, 2.7, 2.56, 2.1), sec_stress_avg = c(2.6, 1.4, 
2.44, 3.1, 3.5, 2.8, 3.1, 2.4, 3.1, 3.33, 2.56, 1.8, 2.8, 1.9, 
3.1, 2.8, 1.5, 3.8, 3.9, 2.6), burnout_ee_avg = c(2.11, 2.33, 
2.78, 2.67, 4.67, 1.22, 1, 3.33, 1.78, 4.33, 3.33, 1.78, 2.78, 
1.11, 1.67, 2.89, 5.89, 1.78, 3, 0.78), burnout_dp_avg = c(1.6, 
0.4, 1.2, 2.4, 1.8, 0.75, 1.2, 2.8, 0.6, 2.4, 4.2, 2.4, 1.2, 
0.6, 3.8, 3.2, 5.6, 1, 1.6, 0.4), burnout_pa_avg = c(5.13, 5.75, 
4.75, 2.88, 5.25, 4.67, 5.75, 5, 5.5, 5.25, 4.88, 4.5, 3.75, 
4.13, 3.13, 4, 4, 3, 4.88, 5.88), obs_avg = c(3.63, 3.25, 2, 
4.38, 2.88, 4, 3.75, 2.38, 2.13, 2.75, 4.63, 3.88, 3, 2.14, 3.83, 
3.5, 2.25, 2.63, 4.13, 3.88), desc_avg = c(3, 3.38, 4.5, 3.88, 
3.38, 3.13, 3.63, 2.63, 3.75, 4.25, 3.5, 4.38, 2.57, 3.63, 3.25, 
3.63, 3.13, 4.13, 4.25, 3.38), aware_avg = c(2.5, 4.25, 4.63, 
4.25, 4.13, 3.5, 4.13, 3.25, 3.25, 4.75, 4.13, 4.75, 3.5, 3.88, 
2.13, 4.13, 3.5, 4.13, 3.57, 3.25), nonjudg_avg = c(1.88, 3.63, 
4.38, 1.88, 2.63, 3.25, 3, 3, 3.25, 4, 2, 3, 3, 4.88, 1.86, 2.88, 
3.25, 2.5, 2.38, 1.63), nonreac_avg = c(3.71, 3.57, 2.43, 4.29, 
3, 3.43, 3.86, 3.86, 2.86, 4.29, 3.86, 3, 3, 3.14, 4.43, 3.43, 
2.8, 3.71, 3.57, 3.43), wkplre_wc_avg = c(5.07, 6.13, 5.8, 5.27, 
4.33, 6.2, 4.07, 7, 6.27, 2.29, 5.14, 4.4, 4.73, 5.47, 5.07, 
4.93, 3.07, 5.6, 5.73, 4.8), Efficacy_avg = c(4, 1.4, 3.6, 3.1, 
3.1, 2.9, 3.6, 2, 2.5, 3.3, 3.7, 3.6, 1.9, 3.7, 3.5, 3.6, 3.2, 
3.6, 3.5, 3.9), Lotr_avg = c(2.17, 2.33, 3.6, 0.5, 2.67, 1.67, 
3.2, 2.17, 2.5, 3.67, 2.33, 3.67, 1.17, 1.83, 2, 2.67, 1.83, 
2.67, 2.83, 3.5), hsecontrol_avg = c(3.67, 4.5, 3.5, 3.5, 3.17, 
3.83, 4.5, 4.33, 3.83, 3.83, 3.67, 4.67, 4.5, 3.67, 3.83, 3.17, 
3, 4.17, 3.83, 3.17), hsemsupport_avg = c(3.6, 4, 3.2, 3.6, 3.2, 
4.2, 3.6, 4, 3.8, 3.6, 3, 4.2, 3.4, 4.2, 3.8, 3.2, 2.4, 4, 4, 
3.8), hsepsupport_avg = c(3.25, 4, 3.75, 3.5, 3, 4.75, 4.25, 
4.75, 3.75, 3.5, 4.67, 4.25, 3.75, 4, 4, 3.25, 1.5, 4, 4, 4), 
    hserole_avg = c(4.8, 5, 4.4, 4.2, 5, 4, 4, 4.2, 4, 4.6, 4.6, 
    4.8, 4.2, 4.2, 3.2, 4.4, 2.8, 4, 4.2, 5), hsedemands_avg = c(2, 
    3.29, 3.29, 4, 1.86, 3.57, 3.29, 1.71, 3.14, 1.71, 3.71, 
    3.71, 3.43, 3.86, 1.86, 2.71, 4, 3.29, 3.57, 2.57)), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"), na.action = structure(c(`1` = 1L, 
`5` = 5L, `11` = 11L, `15` = 15L, `19` = 19L, `24` = 24L, `27` = 27L, 
`30` = 30L, `46` = 46L, `47` = 47L), class = "omit"))
4

1 回答 1

1

您需要采取两个步骤来完成这项工作。有两个位置只有两个观察值,这不适用于 cor.test.default。子集您的数据以删除这些观察结果:

csdata <- 
  csdata %>%
  filter(
    location != "Kawolo General Hospital"
  , location != "Lwanyonyi VHT"
  )

但是,现在您的数据集将保留这些因子水平,但每个都有 0 个观察值。使用以下方法将变量转换locations为因子:

csdata$location <- factor(csdata$location)

现在你的 ggpairs 与美学映射将运行没有问题:

ggpairs(csdata,lower=list(continuous="smooth"),mapping=ggplot2::aes(color= location)) +
  theme_bw()
于 2021-03-11T04:50:33.343 回答