-1
 Id         authId          sessionId                         
139 "56763313.wrpy" "4233a31b52f92c6fe8af4f04f2116657"
123 "221156400"     "ae04ddacadaa3429ca77dab674a008bf"
126 "221156400"     "ae04ddacadaa3429ca77dab674a008bf"
144 "221156400"     "ae04ddacadaa3429ca77dab674a008bf"
143 "221156400"     "ae04ddacadaa3429ca77dab674a008bf"
118 NA              "ae04ddacadaa3429ca77dab674a008bf"
121 NA              "ae04ddacadaa3429ca77dab674a008bf"
122 NA              "ae04ddacadaa3429ca77dab674a008bf"
75  "5676614888888" "ca673b5e60a6f70963bf3017e3cb0780"
276 "56711325.cc79" "f6075188c0f479d7a423744f6c8655b3"
256 "56711325.cc79" "f6075188c0f479d7a423744f6c8655b3"
275 "56711325.cc79" "f6075188c0f479d7a423744f6c8655b3"
152 NA              "f6075188c0f479d7a423744f6c8655b3"
158 NA              "f6075188c0f479d7a423744f6c8655b3"
28  "221124184"     "fc71064548bb35d05293bd67d55f1693"
31  "221124184"     "fc71064548bb35d05293bd67d55f1693"

我想authId根据sessionId. 我试图在不使用循环的情况下做到这一点。例如:

143 "221156400"     "ae04ddacadaa3429ca77dab674a008bf"
118 "221156400"     "ae04ddacadaa3429ca77dab674a008bf"
4

1 回答 1

3

authId首先创建一个具有和的独特组合的数据框sessionId。然后找到sessionId任何authId是的NA。使用唯一表查找sessionId相关的authId

df <- read.table(text="Id         authId          sessionId                         
139 56763313.wrpy 4233a31b52f92c6fe8af4f04f2116657
123 221156400     ae04ddacadaa3429ca77dab674a008bf
126 221156400     ae04ddacadaa3429ca77dab674a008bf
144 221156400     ae04ddacadaa3429ca77dab674a008bf
143 221156400     ae04ddacadaa3429ca77dab674a008bf
118 NA              ae04ddacadaa3429ca77dab674a008bf
121 NA              ae04ddacadaa3429ca77dab674a008bf
122 NA              ae04ddacadaa3429ca77dab674a008bf
75  5676614888888 ca673b5e60a6f70963bf3017e3cb0780
276 56711325.cc79 f6075188c0f479d7a423744f6c8655b3
256 56711325.cc79 f6075188c0f479d7a423744f6c8655b3
275 56711325.cc79 f6075188c0f479d7a423744f6c8655b3
152 NA              f6075188c0f479d7a423744f6c8655b3
158 NA              f6075188c0f479d7a423744f6c8655b3
28  221124184     fc71064548bb35d05293bd67d55f1693
31  221124184     fc71064548bb35d05293bd67d55f1693", header=T)


# find unique combinations of authId and sessionID, but not when authId is NA
uniques <- unique(df[c("authId", "sessionId")])
uniques <- uniques[!is.na(uniques$authId),]

# replace authID's that are NA with the unique authId associated with the sessionId
na.authId <- which(is.na(df$authId))
na.sessionId <- df$sessionId[na.authId]
df$authId[na.indices] <- uniques$authId[match(na.sessionId, uniques$sessionId)]


#     Id        authId                        sessionId
# 1  139 56763313.wrpy 4233a31b52f92c6fe8af4f04f2116657
# 2  123     221156400 ae04ddacadaa3429ca77dab674a008bf
# 3  126     221156400 ae04ddacadaa3429ca77dab674a008bf
# 4  144     221156400 ae04ddacadaa3429ca77dab674a008bf
# 5  143     221156400 ae04ddacadaa3429ca77dab674a008bf
# 6  118     221156400 ae04ddacadaa3429ca77dab674a008bf
# 7  121     221156400 ae04ddacadaa3429ca77dab674a008bf
# 8  122     221156400 ae04ddacadaa3429ca77dab674a008bf
# 9   75 5676614888888 ca673b5e60a6f70963bf3017e3cb0780
# 10 276 56711325.cc79 f6075188c0f479d7a423744f6c8655b3
# 11 256 56711325.cc79 f6075188c0f479d7a423744f6c8655b3
# 12 275 56711325.cc79 f6075188c0f479d7a423744f6c8655b3
# 13 152 56711325.cc79 f6075188c0f479d7a423744f6c8655b3
# 14 158 56711325.cc79 f6075188c0f479d7a423744f6c8655b3
# 15  28     221124184 fc71064548bb35d05293bd67d55f1693
# 16  31     221124184 fc71064548bb35d05293bd67d55f1693
于 2012-08-24T23:15:07.890 回答