为此,我发现将因子转换为数字很方便。这是我的第一步,因为要比较列的数量,这条路径似乎不那么艰巨。
为此,我使用了 afor
包qdap
,因为在 macth 中我将值替换为NA
.
library(dplyr)
library(qdap)
df <- data.frame(Su = rep(1,3),
Score = rep(90,3),
ROI = c("A", "A", "B"),
NETX_ROI = c("A", "B", "C"),
third_roi = rep("B", 3),
four_roi = c("B", "B", "A"),
five_roi = c("B", "B", "D"))
df
> df
Su Score ROI NETX_ROI third_roi four_roi five_roi
1 1 90 A A B B B
2 1 90 A B B B B
3 1 90 B C B A D
df2 <- df
roi <- c("A", "B", "C", "D")
# A = Elsewhere
# B = Teacher
# C = Pen
# D = Smartboard
n <- seq(1, length.out = length(roi))
for (i in 1:length(n)) {
df2[df2 == roi[i]] <- NA
df2 <- qdap::NAer(df2, i)
}
> df2
Su Score ROI NETX_ROI third_roi four_roi five_roi
1 1 90 1 1 2 2 2
2 1 90 1 2 2 2 2
3 1 90 2 3 2 1 4
df2 <- df2 %>%
dplyr::select(-c(Su, Score)) %>%
as.matrix()
nn <- ncol(df2)
x <- matrix(nrow = nrow(df2), ncol = ncol(df2)-1)
for (i in 1:(nn-1)) {
xx <- ifelse(df2[,i] == df2[,i+1], NA, 0)
x[,i] <- as.matrix(xx)
}
> x
[,1] [,2] [,3] [,4]
[1,] NA 0 NA NA
[2,] 0 NA NA NA
[3,] 0 0 0 0
最后,我只是删除了带有NA
.
dfx <- x %>%
as.data.frame()
df_test <- df %>%
dplyr::bind_cols(dfx) %>%
na.omit() %>%
dplyr::select(1:ncol(df))
df_test
> df_test
Su Score ROI NETX_ROI third_roi four_roi five_roi
3 1 90 B C B A D