2

我有类似的数据(尽管数据集更大):

  correct.trial1 RT.trial1 correct.trial2 RT.trial2 correct.trial3 RT.trial3
1              1       473              0       337              1       426
2              1       496              1       407              1       421
3              1       368              0       405              1       470
4              0       333              1       475              0       473
5              0       435              0       312              1       402

我们可以用这个来制作这个样本:

set.seed(12)
df <- data.frame(correct.trial1 = sample(0:1, 5, replace=T),
                 RT.trial1 = sample(300:500, 5, replace=T),
                 correct.trial2 = sample(0:1, 5, replace=T),
                 RT.trial2 = sample(300:500, 5, replace=T),
                 correct.trial3 = sample(0:1, 5, replace=T),
                 RT.trial3 = sample(300:500, 5, replace=T))

我想starts_with("RT.trial")NA相邻(左)列starts_with("correct.trial")的值为 0 时替换值。当然,我可以一次做一个,例如:

library(dplyr)
df %>%
  mutate(RT.trial1 = ifelse(correct.trial1==1, RT.trial1, NA),
         RT.trial2 = ifelse(correct.trial2==1, RT.trial2, NA),
         RT.trial3 = ifelse(correct.trial3==1, RT.trial3, NA))

所以它看起来像这样:

  correct.trial1 RT.trial1 correct.trial2 RT.trial2 correct.trial3 RT.trial3
1              1       473              0        NA              1       426
2              1       496              1       407              1       421
3              1       368              0        NA              1       470
4              0        NA              1       475              0        NA
5              0        NA              0        NA              1       402

但这对于成千上万的列是不切实际的。

问题

如何同时对所有列执行此操作?(注意:我更喜欢一个dplyr解决方案,并且 usingacross比 using 更可取mutate_at。)

试图

不确定,但根据这个相关的帖子,它(可能)看起来像这样:

df %>%
  mutate_at(vars(starts_with("RT.trial")),
  ~ifelse(vars(starts_with("correct.trial"))==0, NA, .x))
4

3 回答 3

2

我们可以重塑为“长”格式,然后进行转换

library(dplyr)
library(tidyr)
df %>% 
    mutate(rn = row_number()) %>% 
    pivot_longer(cols = -rn, names_to = c(".value", "grp"), 
          names_sep="\\.") %>%
    mutate(RT = case_when(as.logical(correct) ~ RT)) %>% 
    pivot_wider(names_from = grp, values_from = c(correct, RT), 
          names_sep = ".") %>%
    select(names(df))

-输出

# A tibble: 5 x 6
#  correct.trial1 RT.trial1 correct.trial2 RT.trial2 correct.trial3 RT.trial3
#           <int>     <int>          <int>     <int>          <int>     <int>
#1              0        NA              0        NA              0        NA
#2              1       394              1       458              0        NA
#3              0        NA              1       337              0        NA
#4              1       479              0        NA              0        NA
#5              0        NA              0        NA              0        NA

base R中,这可以以更简单的方式完成

i1 <- grepl('correct', names(df))
df[!i1] <- (NA^!df[i1]) * df[!i1]

数据

df <- structure(list(correct.trial1 = c(0L, 1L, 0L, 1L, 0L), RT.trial1 = c(417L, 
394L, 345L, 479L, 368L), correct.trial2 = c(0L, 1L, 1L, 0L, 0L
), RT.trial2 = c(382L, 458L, 337L, 406L, 306L), correct.trial3 = c(0L, 
0L, 0L, 0L, 0L), RT.trial3 = c(469L, 364L, 361L, 359L, 309L)),
 class = "data.frame", row.names = c("1", 
"2", "3", "4", "5"))
于 2020-12-15T22:37:59.117 回答
2

这也有效。这是使用cross的最简单方法。

library(tidyverse)

df %>% 
  mutate(across(starts_with("RT.trial"), ~ if_else(get(str_c("correct.trial", str_sub(cur_column(), -1))) == 0, NA_integer_, .)))

这给出了:

  correct.trial1 RT.trial1 correct.trial2 RT.trial2 correct.trial3 RT.trial3
1              1       473              0        NA              1       426
2              1       496              1       407              1       421
3              1       368              0        NA              1       470
4              0        NA              1       475              0        NA
5              0        NA              0        NA              1       402

于 2021-02-07T23:22:22.893 回答
1

如果你想坚持tidyverse这里还有一个选择:

library(dplyr)

purrr::map2_dfc(df %>% select(starts_with('RT')), 
                df %>% select(starts_with('correct')),
                ~if_else(.y == 0, NA_integer_, .x)) %>%
  bind_cols(df %>% select(starts_with('correct'))) %>%
  #To get correct order of columns
  select(order(as.numeric(sub('\\D+', '', names(.)))))

#  RT.trial1 correct.trial1 RT.trial2 correct.trial2 RT.trial3 correct.trial3
#      <int>          <int>     <int>          <int>     <int>          <int>
#1       473              1        NA              0       426              1
#2       496              1       407              1       421              1
#3       368              1        NA              0       470              1
#4        NA              0       475              1        NA              0
#5        NA              0        NA              0       402              1
于 2020-12-16T03:17:23.373 回答