0

我试图在两个不同数据帧的两列之间找到重复项。在识别出重复项后,我想从重复项所在的同一行但从不同的列中提取观察值,并将其插入到另一个数据框中。让我举个例子:

Table1:
tobecopied   B   Checkfordup   D
Copy1        2   dupchk1       5
Copy2        3   dupchk5       4
Copy3        4   dupchk4       K

Table2:
tobepastedinto   B   Checkfordup   D
                 5   dupchk1       L
                 6   dupchk2       M
                 7   dupchk4       3

所以代码运行后,表二将如下所示:

Updated Table2:

tobepastedinto   B   Checkfordup   D
Copy1            5   dupchk1       L
                 6   dupchk2       M
Copy3            7   dupchk4       3

我试图做的是创建一个函数来执行此操作并在两列中使用 mapply 。代码如下所示:

             checknum <- function(x,y){
               if(y=x){
                 gsub(x,y,Table2$tobepastedinto)
               }
               else{""}
             }
            mapply(checknum,Table2$Checkfordup,Table1$Checkfordup)

该函数需要永远在 R 中运行,我很确定我做错了。有没有人对我正在尝试做的事情有更好的解决方案?或者有没有更好的方法来使用mapply?

编辑:这是小数据集。NASET 里面没有数字。我想查看 Numberset 中的任何手机是否与 NASET 中的手机匹配,然后将相应的 Number 添加到 NASET,即使名称不匹配:

 NASET:
 name     Number     mobile
 VAN                 678
 GEORGE              6564
 STEVEN              76787



Numberset:
 name     Number     mobile
 TEU      7          678
 GEGE     6          64
 VEN      5          87
 TETK     7          678

Updated NASET:
NASET:
 name     Number     mobile
 VAN      7          678
 GEORGE              6564
 STEVEN              76787
4

1 回答 1

1

你可以试试

df2$tobepasteinto <- df1$tobecopied[match(df2$Checkfordup, df1$Checkfordup)]
df2$tobepasteinto[is.na(df2$tobepasteinto)] <- ''

或者

df2$tobepasteinto <-  mapply(function(x,y,z) {indx <- match(x,y)
                          ifelse(is.na(indx), '', z[indx])},
               df2$Checkfordup, list(df1$Checkfordup),list(df1$tobecopied))

更新

  NASET$Number <- Numberset$Number[match(NASET$mobile, Numberset$mobile)]
  NASET$Number[is.na(NASET$Number)] <- ''
  NASET
  #    name Number mobile
  #1    VAN      7    678
  #2 GEORGE          6564
  #3 STEVEN         76787

或者

  NASET$Number <- mapply(function(x,y,z) {
                     indx <- match(x,y)
                   ifelse(is.na(indx), '', z[indx])},
             NASET$mobile, list(Numberset$mobile), list(Numberset$Number))

或者

  library(dplyr)
  left_join(NASET[,-2], unique(Numberset[2:3]), by='mobile')
  #   mobile   name Number
  #1    678    VAN      7
  #2   6564 GEORGE     NA
  #3  76787 STEVEN     NA

数据

df1 <-  structure(list(tobecopied = c("Copy1", "Copy2", "Copy3"), B = 2:4, 
Checkfordup = c("dupchk1", "dupchk5", "dupchk4"), D = c("5", 
"4", "K")), .Names = c("tobecopied", "B", "Checkfordup", 
"D"), class = "data.frame", row.names = c(NA, -3L))

 df2 <-  structure(list(tobepastedinto = c("", "", "", ""), B = 5:8,
  Checkfordup = c("dupchk1", "dupchk2", "dupchk4", "dupchk4"), 
  D = c("L", "M", "3", "5")), .Names = c("tobepastedinto", 
 "B", "Checkfordup", "D"), row.names = c(NA, -4L), class = "data.frame")

新数据

  NASET <- structure(list(name = c("VAN", "GEORGE", "STEVEN"), Number = c(NA, 
  NA, NA), mobile = c(678L, 6564L, 76787L)), .Names = c("name", 
  "Number", "mobile"), class = "data.frame", row.names = c(NA, -3L))

 Numberset <- structure(list(name = c("TEU", "GEGE", "VEN", "TETK"),
 Number = c(7L, 6L, 5L, 7L), mobile = c(678L, 64L, 87L, 678L)), .Names =
  c("name", "Number", "mobile"), class = "data.frame", row.names = c(NA, 
 -4L))
于 2014-12-22T04:32:12.170 回答