我有一个看起来像这样的数据集,我正在尝试对其执行 Kruskal Wallis 测试
它的r代码如下:
my_data <- read.csv('NBvariants_KWtest.csv',header = TRUE)
head(my_data)
levels(my_data$NaiveBayesvariant)
my_data$NaiveBayesvariant <- ordered(my_data$NaiveBayesvariant,
levels = c("I", "II", "III","IV","V","VI"))
library(dplyr)
group_by(my_data, NaiveBayesvariant) %>%
summarise(
count = n(),
mean = mean(accuracy, na.rm = TRUE),
sd = sd(accuracy, na.rm = TRUE),
median = median(accuracy, na.rm = TRUE),
IQR = IQR(accuracy, na.rm = TRUE)
)
library("ggpubr")
ggboxplot(my_data, x = "NaiveBayesvariant", y = "Accuracy",
color = "NaiveBayesvariant", palette = c("#00AFBB", "#E7B800", "#FC4E07","#00AFBB", "#E7B800", "#FC4E07"),
order = c("I", "II", "III","IV","V","VI"),
ylab = "Accuracy", xlab = "Naive Bayes variant")
ggline(my_data, x = "NaiveBayesvariant", y = "Accuracy",
add = c("mean_se", "jitter"),
order = c("I", "II", "III","IV", "V", "VI"),
ylab = "Naive Bayes variant", xlab = "Accuracy")
kruskal.test(accuracy ~ NaiveBayesvariant, data = my_data)
但是,我收到此错误:
> kruskal.test(accuracy ~ NaiveBayesvariant, data = my_data)
Error in model.frame.default(formula = accuracy ~ NaiveBayesvariant, data = my_data) :
variable lengths differ (found for 'NaiveBayesvariant')
> kruskal.test(accuracy ~ NaiveBayesvariant, data = my_data