2

对于以下数据集

mydata=data.frame(x1_c1=c(1:5),
                  x2_c1=c(2:6),
                  x3_c1=c(3:7),
                  x4_c1=c(4:8),
                  x1_c2=0,
                  x2_c2=0,
                  x3_c2=0,
                  x4_c2=0,
                  x1_c3=c(1:5),
                  x2_c3=c(2:6),
                  x3_c3=c(3:7),
                  x4_c3=c(4:8))

> mydata
  x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1     1     2     3     4     0     0     0     0     1     2     3     4
2     2     3     4     5     0     0     0     0     2     3     4     5
3     3     4     5     6     0     0     0     0     3     4     5     6
4     4     5     6     7     0     0     0     0     4     5     6     7
5     5     6     7     8     0     0     0     0     5     6     7     8

我想从以 ,_c3结尾的变量中减去以 , 结尾的所有变量_c1_c2然后_c3合并所有列。这是一个选项

mydata_update=cbind(mydata[,grep("_c1", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ],
                    mydata[,grep("_c2", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ],
                    mydata[,grep("_c3", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ])

预期结果是

> mydata_update
  x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1     0     0     0     0    -1    -2    -3    -4     0     0     0     0
2     0     0     0     0    -2    -3    -4    -5     0     0     0     0
3     0     0     0     0    -3    -4    -5    -6     0     0     0     0
4     0     0     0     0    -4    -5    -6    -7     0     0     0     0
5     0     0     0     0    -5    -6    -7    -8     0     0     0     0

任何其他方法都适用。

4

3 回答 3

3

匹配数据和减法部分的前缀,然后相减:

subsel <- endsWith(names(mydata), "_c3")
prefix <- sub("_.+", "", names(mydata))
mydata - mydata[subsel][match(prefix, prefix[subsel])]

#  x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
#1     0     0     0     0    -1    -2    -3    -4     0     0     0     0
#2     0     0     0     0    -2    -3    -4    -5     0     0     0     0
#3     0     0     0     0    -3    -4    -5    -6     0     0     0     0
#4     0     0     0     0    -4    -5    -6    -7     0     0     0     0
#5     0     0     0     0    -5    -6    -7    -8     0     0     0     0

或者,如果您想生活在边缘,并且您确定您的数据是完整的并且按预期排序:

mydata - as.matrix(mydata[,endsWith(names(mydata), "_c3")])
于 2021-06-03T22:05:42.580 回答
1

我们可以使用split.default根据列名的子字符串将数据分组,然后在每个元素中找到“c3”列listgrep其中包含cbindlistdo.call

out <- do.call(cbind, unname(lapply(split.default(mydata, 
           sub("_.*", "", names(mydata))), 
          function(x) x - x[,grep("_c3", names(x))])))[names(mydata)]

-输出

 out
  x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1     0     0     0     0    -1    -2    -3    -4     0     0     0     0
2     0     0     0     0    -2    -3    -4    -5     0     0     0     0
3     0     0     0     0    -3    -4    -5    -6     0     0     0     0
4     0     0     0     0    -4    -5    -6    -7     0     0     0     0
5     0     0     0     0    -5    -6    -7    -8     0     0     0     0

或者我们可以使用tidyverse

library(dplyr)
library(tidyr)
mydata %>% 
     mutate(rn = row_number()) %>% 
     pivot_longer(cols = -rn, names_to = c(".value", "grp"), 
            names_sep = "_") %>% 
     group_by(rn) %>%
     mutate(across(where(is.numeric), ~ . - .[grp == 'c3'])) %>% 
     ungroup %>%
     pivot_wider(names_from = grp, values_from = x1:x4) %>% 
     select(-rn) %>%
     select(names(mydata))

-输出

# A tibble: 5 x 12
  x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     0     0     0     0    -1    -2    -3    -4     0     0     0     0
2     0     0     0     0    -2    -3    -4    -5     0     0     0     0
3     0     0     0     0    -3    -4    -5    -6     0     0     0     0
4     0     0     0     0    -4    -5    -6    -7     0     0     0     0
5     0     0     0     0    -5    -6    -7    -8     0     0     0     0
于 2021-06-03T21:44:00.340 回答
1

这是使用循环的另一种方法:

sm <- mydata[,grep("_c3",colnames(mydata))]
mydata_update <- mydata
for (i in seq(1,ncol(mydata),ncol(sm))) {
    mydata_update[,i:(i+ncol(sm)-1)] <- mydata_update[,i:(i+ncol(sm)-1)]-sm
}

mydata_update
  x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1     0     0     0     0    -1    -2    -3    -4     0     0     0     0
2     0     0     0     0    -2    -3    -4    -5     0     0     0     0
3     0     0     0     0    -3    -4    -5    -6     0     0     0     0
4     0     0     0     0    -4    -5    -6    -7     0     0     0     0
5     0     0     0     0    -5    -6    -7    -8     0     0     0     0

于 2021-06-03T22:05:18.937 回答