0

我在为数据帧编写函数时遇到了两个问题。我经常得到一个带有 2 个变量的数据框,我想将它们重新编码为一个变量。

If V1>0 and V2 <0 then new_variable = "V1>0, V2<0. 

在所有数据框中,我有 V1 和 V2 有不同的名称。

问题1。不知道为什么test_df$newVar,这个函数后得到的只有“C>0,I>0”

#Using test FUN on example data frame
    test_df.afterFUN <- test_fun(test_df, var1 = "V1", var2 = "V2", newVar = "category")

问题2。为什么这个函数“newVar”的最后一个参数没有将名称更改为“类别”?如果我运行适合单个数据框的此函数的代码(重命名变量等),它将起作用并给我想要的东西(看看 test_df2)

rm(list = ls())
    library("dplyr") # for filter
    # Preparing example data frame
    rama <- rbind(c(-5:20, -20:5), c(-20:5, -5:20))
    rama <- t(rama)
    colnames(rama) <- c("V1", "V2")
    test_df <- as.data.frame(rama)

#Test FUN

test_fun <- function(df, var1, var2, newVar) {
  df1 <- filter(df, var1 == 0, var2 == 0)
  df1  <- mutate(df1, newVar = "C=0, I=0")
  df2 <- filter(df, var1 == 0, var2  > 0)
  df2  <- mutate(df2, newVar = "C=0, I>0")
  df3 <- filter(df, var1 == 0, var2  < 0)
  df3  <- mutate(df3, newVar =  "C=0, I<0")
  df4 <- filter(df, var1 >  0, var2 == 0)
  df4  <- mutate(df4, newVar =  "C>0, I=0")
  df5 <- filter(df, var1 >  0, var2  > 0)
  df5  <- mutate(df5, newVar =  "C>0, I>0")
  df6 <- filter(df, var1 >  0, var2  < 0)
  df6  <- mutate(df6, newVar =  "C>0, I<0")
  df7 <- filter(df, var1 <  0, var2 == 0)
  df7  <- mutate(df7, newVar =  "C<0, I=0")
  df8 <- filter(df, var1 <  0, var2 >  0)
  df8  <- mutate(df8, newVar =  "C<0, I>0")
  df9 <- filter(df, var1 <  0, var2 <  0)
  df9  <- mutate(df9, newVar =  "C<0, I<0")
   df <- rbind(df1, df2, df3, df4, df5, df6, df7, df8, df9)
   return(df)
    }

    #Using test FUN on example data frame
    test_df.afterFUN <- test_fun(test_df, var1 = "V1", var2 = "V2", newVar = "category")

    # Procedure outside of funcion fitted to test_df
    df1 <- filter(test_df, V1 == 0, V2 == 0)
    df1  <- mutate(df1, newVar = "C=0, I=0")
    df2 <- filter(test_df, V1 == 0, V2  > 0)
    df2  <- mutate(df2, newVar = "C=0, I>0")
    df3 <- filter(test_df, V1 == 0, V2  < 0)
    df3  <- mutate(df3, newVar =  "C=0, I<0")
    df4 <- filter(test_df, V1 >  0, V2 == 0)
    df4  <- mutate(df4, newVar =  "C>0, I=0")
    df5 <- filter(test_df, V1 >  0, V2  > 0)
    df5  <- mutate(df5, newVar =  "C>0, I>0")
    df6 <- filter(test_df, V1 >  0, V2  < 0)
    df6  <- mutate(df6, newVar =  "C>0, I<0")
    df7 <- filter(test_df, V1 <  0, V2 == 0)
    df7  <- mutate(df7, newVar =  "C<0, I=0")
    df8 <- filter(test_df, V1 <  0, V2 >  0)
    df8  <- mutate(df8, newVar =  "C<0, I>0")
    df9 <- filter(test_df, V1 <  0, V2 <  0)
    df9  <- mutate(df9, newVar =  "C<0, I<0")
    test_df2 <- rbind(df1, df2, df3, df4, df5, df6, df7, df8, df9)
4

1 回答 1

0

这可能写得更好,但试试:

test_fun <- function(df,col1, col2, newVar) {
    temp <- sapply(df[,c(col1,col2)],function(x) revalue(factor(sign(x)),c("-1"="<0","0"="=0","1"=">0")))
    df[,newVar] <- apply(temp, 1, function(y) paste0(col1,y[1],", ",col2,y[2]))
    df
}

head(test_fun(test_df,"V1", "V2", "category"))
#   V1  V2   category
# 1 -5 -20 V1<0, V2<0
# 2 -4 -19 V1<0, V2<0
# 3 -3 -18 V1<0, V2<0
# 4 -2 -17 V1<0, V2<0
# 5 -1 -16 V1<0, V2<0
# 6  0 -15 V1=0, V2<0

解释

我们用于sign获取列中每个数字的符号(返回 -1、0 或 1)。然后,我们将这些数字重写为字符串“<0”、“=0”和“>0”,使用revalue(factor),c()). 我们sapply用来将它应用于test_df. 这将返回一个字符矩阵。然后我们应用paste到每一行以获得你想要的字符向量。最后,我们将该向量分配给test_df$category

于 2016-02-29T12:33:07.870 回答