2

我需要有选择地更新 df 列中的值,具体取决于对 df2 中列的条件的满足,并提供 df1 的更新值,即 df2 列中的值。两个 df 都有一个 col ,其值是唯一的,并且 df2 中的唯一值是 df1 中的一个真子集。我尝试的方法是获取两个 df 中的唯一列值,并将它们转换为行名,使用它们来定义一个选择索引,该索引是从 df2 创建的,然后应用于 df1 以进行值更新。通过使用数字下标定义列,并结合我的基于字符的行共享键索引,我得到了可以工作的语法(最终!)。唷。

但是,有没有一种比我正在尝试的更简单、更有效、更“R”的方式来做到这一点,也许是使用内置的?我将需要扩展。测试示例如下:

goo <- data.frame(Uids=c("UidD", "UidA", "UidC"), Payout=c(3,0,5), stringsAsFactors = FALSE)
moo <- data.frame(Uids=c("UidB", "UidC", "UidA", "UidD"), PayOut=0, stringsAsFactors = FALSE)
goo
  Uids Payout
1 UidD      3
2 UidA      0
3 UidC      5
moo
  Uids PayOut
1 UidB      0
2 UidC      0
3 UidA      0
4 UidD      0
# I want to update moo$Payout with the value of goo$Payout, for matching Uids,
# when goo$Payout > 0, i.e. moo[4,2] <- goo[1,2]; moo[2,2 <- goo[3,2]
rownames(goo) <- goo$Uids
rownames(moo) <- moo$Uids
#I am trying to create and apply an index based on turning uids into rownames
IndexToUpdate <- goo$Uids[goo$Payout>0]
IndexToUpdate
[1] "UidD" "UidC"
 moo[IndexToUpdate, 2] <- goo[IndexToUpdate, 2]
#this works, but is there a better way to do it?
moo
     Uids PayOut
UidB UidB      0
UidC UidC      5
UidA UidA      0
UidD UidD      3
4

1 回答 1

3

I would use merge with all.x = TRUE:

voo <- merge(moo, goo, by = "Uids", all.x = TRUE)
voo
#   Uids PayOut.x PayOut.y
# 1 UidA        0        0
# 2 UidB        0       NA
# 3 UidC        0        5
# 4 UidD        0        3

Then ifelse:

within(voo, PayOut <- ifelse(is.na(PayOut.y), PayOut.x, PayOut.y))
#   Uids PayOut.x PayOut.y PayOut
# 1 UidA        0        0      0
# 2 UidB        0       NA      0
# 3 UidC        0        5      5
# 4 UidD        0        3      3

The same thing using data.tables:

library(data.table)
GOO <- data.table(goo)
MOO <- data.table(moo)
setkey(GOO, Uids)
setkey(MOO, Uids)
VOO <- GOO[MOO]
VOO[, FinalPayout := ifelse(is.na(PayOut), PayOut.1, PayOut)]
于 2013-02-10T13:45:57.523 回答