0

我的问题是指在 Dplyr 中循环。我正在尝试确定自变量(IV1:IV5)的每个独特组合的 DV1 的平均值、计数和方差。我的数据如下所示:

DV1     IV1     IV2     IV3     IV4  IV5
506.2   Male    Canada  alpha   low  Orange
418.7   Female  Canada  beta    mid  Orange
380.3   Male    Canada  alpha   high Red
347.3   Male    Canada  alpha   mid  Red
241.6   Female  UStates alpha   mid  Blue
223.7   Female  Canada  beta    low  Green
220.9   Male    Canada  alpha   low  Orange
201.1   Male    Canada  alpha   low  Red
193.8   Female  Canada  beta    mid  Green
179.7   Female  UStates beta    low  Orange
170.7   Male    UStates beta    mid  Blue
149.5   Male    UStates beta    low  Green
146.2   Male    Canada  beta    high Green
144.2   Female  UStates beta    mid  Red
141.5   Male    Canada  beta    high Blue
138.6   Male    Canada  beta    mid  Blue
137     Male    Canada  beta    low  Red
136     Female  UStates beta    mid  Orange
135.9   Female  UStates beta    low  Red
134.6   Male    UStates alpha   mid  Orange
129     Female  UStates beta    mid  Green
127.1   Female  UStates beta    low  Green
120.4   Male    UStates beta    low  Blue
119.3   Female  UStates beta    high Red
118.6   Female  Canada  alpha   low  Blue
116.2   Female  Canada  alpha   high Green
113.7   Male    UStates beta    high Green
112.9   Female  UStates beta    low  Green
112.6   Male    Canada  alpha   mid  Green
112.2   Male    Canada  alpha   mid  Orange
109     Female  Canada  beta    high Orange
108.1   Female  Canada  alpha   mid  Blue
99.1    Female  Canada  alpha   high Blue
95.6    Male    UStates beta    mid  Green
88.1    Male    Canada  alpha   high Blue
83.9    Female  Canada  beta    high Green
83.7    Male    Canada  alpha   low  Green
80.8    Male    Canada  alpha   high Orange
79.9    Female  UStates alpha   high Blue
78      Female  UStates alpha   mid  Red
76.3    Female  UStates alpha   low  Blue
74.1    Female  UStates beta    high Orange
65.7    Female  UStates beta    high Red
62.1    Male    UStates alpha   high Red
54.8    Male    Canada  beta    low  Blue
54      Male    UStates alpha   mid  Red
42.8    Female  UStates alpha   low  Red
39.6    Male    UStates alpha   high Orange
19.5    Male    UStates alpha   low  Orange
19.2    Female  Canada  alpha   mid  Green

使用 Dplyr,我已经能够创建它。我需要 IV1:IV5 的每个独特组合的描述性统计信息下可用的信息

mod1=data1 %>%                                           
  group_by(IV1)%>%  
  summarise(avg_banding=mean(DV1),             
            total.count=n(),                          
            variance=var(DV1)) %>%                 
  print(n=50)     
#Descriptive statistics for the entire group (required!)
sum(mod1$avg_banding) 
mean(mod1$avg_banding)
mean(mod1$total.count)
mod1_2=mod1[complete.cases(mod1),] 
sum(mod1_2$variance) 

正如您可能会说的那样,我必须对每种可能的组合都这样做......(IV1)(IV1,IV2)(IV1,IV2,IV3)(IV2,IV3)等。有很多组合,我想知道是否有一种更简单的方法可以做到这一点,而不是为每个人都插上电源!我尝试使用 for 循环但没有成功。

mod2=data1 %>%                                             
  group_by(IV2)%>%  
  summarise(avg_banding=mean(DV1),             
            total.count=n(),                          
            variance=var(DV1)) %>%                 
  print(n=50)     

sum(mod2$avg_banding) 
mean(mod2$avg_banding)
mean(mod2$total.count)
mod2_2=mod2[complete.cases(mod2),] 
sum(mod2_2$variance)

非常感谢任何帮助!谢谢!

4

3 回答 3

3

您可以使用dplyr和来做到这一点tidyrcrossing您可以通过使用, aleft_join与原始数据,然后group_by和来获得输入数据框的每个组合summarize。我的样本限制为 10 行。请记住下次包含一个可重现的示例,该示例包含通过使用 轻松输入到 R 中的数据dput

library(dplyr)
library(tidyr) #For expand

data <- tibble::tribble(~DV1, ~IV1, ~IV2, ~IV3, ~IV4, ~IV5,
   506.2,   "Male",   "Canada",  "alpha",   "low", "Orange",
   418.7,   "Female", "Canada",  "beta",    "mid",  "Orange",
   380.3,   "Male",   "Canada",  "alpha",   "high", "Red",
   347.3,   "Male",   "Canada",  "alpha",   "mid",  "Red",
   241.6,   "Female", "UStates", "alpha",   "mid",  "Blue",
   223.7,   "Female", "Canada",  "beta",    "low",  "Green",
   220.9,   "Male",   "Canada",  "alpha",   "low",  "Orange",
   201.1,   "Male",   "Canada",  "alpha",   "low",  "Red",
   193.8,   "Female", "Canada",  "beta",    "mid",  "Green",
   179.7,   "Female", "UStates", "beta",    "low",  "Orange"
  )

expand(data, IV1,IV2, IV3, IV4, IV5) %>% 
  left_join(data) %>% 
  group_by(IV1, IV2, IV3, IV4, IV5) %>% 
  summarize(mean = mean(DV1, na.rm = TRUE),
            var = var(DV1, na.rm = TRUE),
            n = n())
于 2018-04-17T20:09:03.777 回答
1

一种方法可能是创建一个记录不同模式的变量。我将每一列重新编码为二进制 0,1 格式,然后将它们连接起来。结果变量pattern是每个组合的唯一“代码”。

df$male<-ifelse(df$IV1=="Male",1,0)
df$ustates<-ifelse(df$IV2=="UStates",1,0)
df$alpha<-ifelse(df$IV3=="alpha",1,0)
df$low<-ifelse(df$IV4=="low",1,0)
df$red<-ifelse(df$IV5=="red",1,0)


attach(df)
df$pattern<-paste(male,ustates,alpha,low,red) #concatenate into a "code"
library(psych)
describe.by(df$DV1,df$pattern)
于 2018-04-17T19:45:09.553 回答
1

这个怎么样:

library(HapEstXXR)
library(dplyr)

# Import the data
data1 = data.frame(DV1 = c(506.2,418.7,380.3,347.3,241.6,223.7,220.9,201.1,193.8,179.7,170.7,149.5,146.2,144.2,141.5,138.6,137,136,135.9)
                   , IV1 = c("Male","Female","Male","Male","Female","Female","Male","Male","Female","Female","Male","Male","Male","Female","Male","Male","Male","Female","Female")
                   , IV2 = c("Canada","Canada","Canada","Canada","UStates","Canada","Canada","Canada","Canada","UStates","UStates","UStates","Canada","UStates","Canada","Canada","Canada","UStates","UStates")
                   , IV3 = c("alpha","beta","alpha","alpha","alpha","beta","alpha","alpha","beta","beta","beta","beta","beta","beta","beta","beta","beta","beta","beta")
                   , IV4 = c("low","mid","high","mid","mid","low","low","low","mid","low","mid","low","high","mid","high","mid","low","mid","low")
                   , IV5 = c("Orange","Orange","Red","Red","Blue","Green","Orange","Red","Green","Orange","Blue","Green","Green","Red","Blue","Blue","Red","Orange","Red")
)

# Create a powerset of IV1, IV2, IV3, IV4, IV5
cols = c("IV1", "IV2", "IV3", "IV4", "IV5")
cols_combos = powerset(cols)

# Create an empty data frame to store the statistics in for each column combination
stats_df = data.frame()

# Function to calculate stats for a particular column combination
getStats = function(data1, stats_df, cols) {
  mod = data1 %>%
    group_by_(.dots = cols) %>%
    summarise(avg_banding=mean(DV1),
              total.count=n(),
              variance=var(DV1)) %>%
    mutate(COMBO = paste0(cols, collapse=",")) %>%
    group_by(COMBO) %>%
    summarise(sum_avg = sum(avg_banding)
              , avg_avg = mean(avg_banding)
              , avg_cnt = mean(total.count)) %>%
    ungroup() %>%
    select(COMBO, sum_avg, avg_avg, avg_cnt)

  stats_df = rbind(stats_df, mod)
  return(stats_df)
}

# Loop through column combinations to generate stats for each
for(col in cols_combos) {
  stats_df = getStats(data1, stats_df, col)
}

需要包 HapEstXXR 才能创建所有列组合的 powerset。结果存储在名为 stats_df 的数据框中。

于 2018-04-17T20:50:20.580 回答