1

我有两个这样的数据框:

输入(x)

structure(list(ICTO = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = "ICTO-6335", class = "factor"), Application = structure(c(5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("AUS-PSOFT", 
"DBA-GL-ORA-PRD", "JPN-PSOFT", "LDN-PSOFT", "LNBCV_GL", "NYBCV_GL", 
"NYK-PSOFT", "SGBCV_GL", "SNG-PSOFT", "02-PEOPLESOFT", "11-SLR-PROC", 
"AP-CIT-BATCH-STATUS", "FCIT-GARS", "GBL-EXPENSE", "GLAD", "HRDMART-MAINT", 
"MISC-PSOFT", "NYK-LATE", "NYK-WKND", "REP_PSOFT"), class = "factor"), 
    Group = structure(c(58L, 58L, 58L, 58L, 58L, 58L, 58L, 58L, 
    58L, 58L), .Label = c("AUS-AP", "AUS-CHF", "AUS-CHK", "AUS-DATE", 
    "AUS-DE", "AUS-DST", "AUS-ESS", "AUS-GL", "AUS-GLI", "AUS-GLR", 
    "AUS-LATE", "AUS-SL", "AUS-SLI", "AUS-SLR", "AUS-SM", "AUS-SMR", 
    "JPN-AM", "JPN-AP", "JPN-CHF", "JPN-CHK", "JPN-DE", "JPN-GL", 
    "JPN-GLI", "JPN-GLR", "JPN-SL", "JPN-SLI", "JPN-SLR", "LDN-AP", 
    "LDN-CHF", "LDN-ESS", "LDN-GBM", "LDN-GL", "LDN-GL-BUD", 
    "LDN-GL-CPM", "LDN-GL-CPM-FULL", "LDN-GL-EIT", "LDN-GL-ITR", 
    "LDN-GLR", "LDN-PSOFT", "LDN-SMR", "NYK-AM", "NYK-AP", "NYK-BO", 
    "NYK-BRANCH", "NYK-CHF", "NYK-ESS", "NYK-GBM", "NYK-GL", 
    "NYK-GL-BUD", "NYK-GL-BUD-HC", "NYK-GL-FOR", "NYK-GLR", "NYK-SM", 
    "NYK-SMR", "PDCGL06", "PDCGL30", "PNYPSGL1", "RFCS", "SGP-GLR", 
    "SNG-AM", "SNG-AP", "SNG-BOK", "SNG-CHF", "SNG-CHK", "SNG-DE", 
    "SNG-GBM", "SNG-GL", "SNG-GL-BUD", "SNG-GLI", "SNG-GLR", 
    "SNG-MAS", "SNG-SHB", "SNG-SL", "SNG-SLI", "SNG-SLR", "SNG-SM", 
    "SNG-SMR", "TIS", "LNBCV", "NYBCV", "NYK-WKND-MAINT", "RECYCLE_APPSERV", 
    "RECYCLE_WEBSERV", "SGBCV", "02-REP-PEOPLESOFT", "11-001-HOUSEKEEP", 
    "11-001-RCL-CHK", "11-SLR-PROC-AU", "11-SLR-PROC-HK", "11-SLR-PROC-IN", 
    "11-SLR-PROC-INT", "11-SLR-PROC-JL", "11-SLR-PROC-KR", "11-SLR-PROC-SG", 
    "11-SLR-REG-RPT", "AUS", "BREAK-GLASS", "CLOAKWARE", "CONV", 
    "EMAIL-ALERT-MONITOR", "FCIT-GLI-GARS", "GLAD-AUS", "GLAD-LON", 
    "GLAD-NYK", "HKG", "HRDMART-MON", "JPN", "LDN", "LedgerLastFeed", 
    "LON_PEOPLESOFT", "NYK", "NYK-LATE", "RECYCLE_PRCSSKED", 
    "SGP", "SGS60A-080", "SPD", "SYNCH-PROD-DR"), class = "factor"), 
    JobName = c("EXBCV06D", "EXBCV06D", "EXBCV06D", "EXBCV06D", 
    "EXBCV06D", "EXBCV06D", "EXBCV06D", "EXBCV06D", "EXBCV06D", 
    "EXBCV06D"), Date = c(120820L, 120817L, 120816L, 120815L, 
    120814L, 120813L, 120810L, 120809L, 120808L, 120807L), Status = structure(c(2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Ended Not OK", 
    "Ended OK", "Executing", "Wait Condition", "Wait Resource"
    ), class = "factor"), StartTime = c(20120821015845, 20120819024725, 
    20120817010722, 20120816010512, 20120815013233, 20120814005343, 
    20120811004005, 20120810004613, 20120809012701, 20120808005116
    ), EndTime = c(20120821015854, 20120819024734, 20120817010733, 
    20120816010521, 20120815013242, 20120814005354, 20120811004015, 
    20120810004623, 20120809012710, 20120808005126), ElapseSecond = c(9L, 
    9L, 11L, 9L, 9L, 11L, 10L, 10L, 9L, 10L)), .Names = c("ICTO", 
"Application", "Group", "JobName", "Date", "Status", "StartTime", 
"EndTime", "ElapseSecond"), row.names = 2689:2698, class = "data.frame")

输入(y)

structure(list(JobName = c("XAPSJCDC0D", "XHPSJCD0HD", "XSPSJCD03D", 
"EXBCV06D", "EXESS120D", "EXGL008D", "EXGL027D", "EXGL028D", 
"EXGL035D", "EXGL042S"), EntryDesc = structure(c(59L, 60L, 61L, 
64L, 53L, 71L, 37L, 70L, 35L, 41L), .Label = c("AFINA FEED", 
"Arrival of All Australia Feeds", "Arrival of All Japan Feeds", 
"Arrival of All Singapore Feeds", "Arrival of Endur Feed", "Basel II Balance Sheet Extract - Pacific", 
"Billing Manager Feed", "BOK Reg Reports Availability", "CD GL Balance Extract J11 AYE to CARAT", 
"CD GL Balance Extract SGP to CARAT", "CD Taiwan GL Extract to SYSTEX", 
"CIF Affiliate Feed", "End of Endur Feed Processing", "End of Spectal BDLite Feed Processing", 
"FTP Carat LCYBS Daily Extract", "FX Shredder Currency upload", 
"GFX FXOps Interface", "GL Balance Extract A48 to CARAT", "GLOBAL MONEY MARKET FEED", 
"Glosub interface", "GMI Feed Load", "Inspire Journal Feed", 
"Intellimatch Feed Sent", "Intellimatch Feed Sent - Australia", 
"Intellimatch Feed Sent - Japan", "Intellimatch Feed Sent - Singapore", 
"Ledger Available - Australia", "Ledger Available - HK/KR/SG", 
"Ledger Available - Japan", "Load GERS Feed", "LOAD GERS FEED", 
"Load of the VATSET Feed file to staging", "Loan IQ feed", "MAS MERP Reports Availability", 
"MONTHLY SUMMARISED JOURNAL FEED", "MyHR feed for HRMS and HR4U (prev. Headcount feed)", 
"NTPA-LOAD TO STAGING USD CCY", "NY NTPA Journal Feed", "OLD WORLD 80 ps_tipsj", 
"OLD WORLD 80 ps_tipzs", "OPC IT - Arrival of GMI Feed", "Opera Exchange Rate Extract - AUS", 
"Opera Exchange Rate Extract - SNG", "PCIT - Arrival of Spectral Feed", 
"Peoplesoft - Basel II Balance Sheet Extract - NY", "Peoplesoft - BDLite Extract", 
"Peoplesoft - End of GMI Feed Processing", "Peoplesoft - End of NTPA GLI Feed Processing", 
"Peoplesoft - FSR fcdb transactions delivered", "Peoplesoft - FSR fclonae delivered", 
"Peoplesoft - FSR gmmbal delivered", "Peoplesoft - FTP Phase II Completion", 
"PeopleSoft - FTP Phase II Completion", "Peoplesoft - FX Rates feed to Opera", 
"Peoplesoft - GL Extract feed for Hong Kong to CARAT", "Peoplesoft - GL Extract feed for India to CARAT", 
"Peoplesoft - GL Extract for Korea Branch to CARAT", "Peoplesoft - NYK Alt YE Close", 
"Poets-GLI Feed to Peoplesoft For AUS", "Poets-GLI Feed to Peoplesoft For HKG", 
"Poets-GLI Feed to Peoplesoft For SNG", "PROCESS INPUT FEED FROM FEPS GE", 
"Project Accounting upload", "Reporting Server Available", "Run application engine to process Endur feed.", 
"SL Period Balance Extract for T15 FTP", "SL YTD Balance Extract for T14 FTP", 
"SL YTD Balance Extract for T15 FTP", "SPECTRAL Feeds", "SPHERE FEED UPLOAD", 
"SUMMIT LOAD TO STAGING", "TPW Sub-ledger extract ftp to CARAT", 
"Peoplesoft - BDLite Extract ", "Peoplesoft - End of GMI Feed Processing "
), class = "factor"), SLAType = structure(c(3L, 3L, 3L, 1L, 3L, 
3L, 3L, 3L, 3L, 2L), .Label = c("DDA", "Milestone", "OLA"), class = "factor"), 
    EntryType = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), .Label = "Automated", class = "factor"), Active = structure(c(1L, 
    2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("OK", "ON"
    ), class = "factor"), LastRun = structure(c(1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L), .Label = c("2012/08/01", "2012/09/06", 
    " 2012/10/08", " 2012/10/10", " 2012/10/12", " 2012/10/15"
    ), class = "factor"), DataCenter = structure(c(2L, 2L, 2L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("PNYSHCTM07", "PSGSHCTM03"
    ), class = "factor"), ProviderReg = structure(c(2L, 2L, 2L, 
    1L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Americas IT View", 
    "Asia Pacific IT View", "EMEA IT View"), class = "factor"), 
    ProviderDiv = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("RF&CS IT", "BO IT"), class = "factor"), 
    ProviderSubDiv = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 1L), .Label = c("CFO IT - Product Control (KGK)", 
    "CFO IT – Financial Accounting (KGX)", "CFO IT - Financial Reporting [KGFX]", 
    "CFO IT ? Financial Accounting (KGX)"), class = "factor"), 
    ReceiverReg = structure(c(2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L), .Label = c("Americas Business View", "Asia Pacific Business View", 
    "EMEA Business View"), class = "factor"), ReceiverDiv = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Finance", 
    "Back Office"), class = "factor"), ReceiverSubDiv = structure(c(2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L), .Label = c("CFO IT – Financial Accounting (KGX)", 
    "Financial Accounting", "Product Control", "CFO - Financial Reporting", 
    "CFO IT ? Financial Accounting (KGX)"), class = "factor"), 
    Service = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L), .Label = c("Accounting Reporting", "Ledger Processing", 
    "Product Control", "Regional Financial Accounting"), class = "factor"), 
    ICTO = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
    ), .Label = "ICTO-6335", class = "factor"), SLAHour = c(4, 
    4, 4, 8.3, 7.3, 3, 3, 3, 4, 4), TargetDate = c(-1L, -1L, 
    -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L)), .Names = c("JobName", 
"EntryDesc", "SLAType", "EntryType", "Active", "LastRun", "DataCenter", 
"ProviderReg", "ProviderDiv", "ProviderSubDiv", "ReceiverReg", 
"ReceiverDiv", "ReceiverSubDiv", "Service", "ICTO", "SLAHour", 
"TargetDate"), row.names = c(NA, 10L), class = "data.frame")

我正在这样做:

    xx<-merge(x, y, all.x=TRUE)

例如,输出如下所示:

head(subset(xx, JobName=="EXBCV06D"),10)
       JobName      ICTO Application Group   Date   Status    StartTime      EndTime ElapseSecond                  EntryDesc SLAType SLAHour TargetDate
35076 EXBCV06D ICTO-6335    LNBCV_GL  RFCS 120417 Ended OK 2.012042e+13 2.012042e+13            9 Reporting Server Available     DDA     8.3         -1
35077 EXBCV06D ICTO-6335    LNBCV_GL  RFCS 120417 Ended OK 2.012042e+13 2.012042e+13            9 Reporting Server Available     DDA     8.3         -1
35078 EXBCV06D ICTO-6335    LNBCV_GL  RFCS 120417 Ended OK 2.012042e+13 2.012042e+13            9 Reporting Server Available     DDA     8.3         -1
35079 EXBCV06D ICTO-6335    LNBCV_GL  RFCS 120417 Ended OK 2.012042e+13 2.012042e+13            9 Reporting Server Available     DDA     8.3         -1
35080 EXBCV06D ICTO-6335    LNBCV_GL  RFCS 120419 Ended OK 2.012042e+13 2.012042e+13            9 Reporting Server Available     DDA     8.3         -1
35081 EXBCV06D ICTO-6335    LNBCV_GL  RFCS 120419 Ended OK 2.012042e+13 2.012042e+13            9 Reporting Server Available     DDA     8.3         -1
35082 EXBCV06D ICTO-6335    LNBCV_GL  RFCS 120419 Ended OK 2.012042e+13 2.012042e+13            9 Reporting Server Available     DDA     8.3         -1
35083 EXBCV06D ICTO-6335    LNBCV_GL  RFCS 120419 Ended OK 2.012042e+13 2.012042e+13            9 Reporting Server Available     DDA     8.3         -1
35084 EXBCV06D ICTO-6335    LNBCV_GL  RFCS 120412 Ended OK 2.012041e+13 2.012041e+13            9 Reporting Server Available     DDA     8.3         -1
35085 EXBCV06D ICTO-6335    LNBCV_GL  RFCS 120412 Ended OK 2.012041e+13 2.012041e+13            9 Reporting Server Available     DDA     8.3         -1

我在同一日期看到相同的 JobName 4 次:

JobName  Date   
EXBCV06D 120417 
EXBCV06D 120417 
EXBCV06D 120417 
EXBCV06D 120417

JobName 和 Date 应该只有一行。例如,应该只有

EXBCV06D 120417

不是这四个。

应该只有这是正确的方法吗?看起来我在同一个日期获得了多个 JobName。我需要为每个日期有一个 JobName。

4

1 回答 1

3

使用合并:

xy <- merge(x, y, by='JobName')

但是,这种合并会给您带来意想不到的结果,因为JobName所有x数据中只有一个唯一的。所以你会松开y. 如果你想保留它们,你可以添加all.y=TRUE,但是你会有很多行带有NA日期。

我建议?merge仔细阅读并创建一个小的示例数据集。数据集应该重现您看到的“问题”,并且您还应该提供预期的输出。在不知道的情况下......很难提供更多帮助。

于 2012-11-14T20:39:42.727 回答