0

我有一个数据框,我想对其进行子集化,以便它拥有最新的数据。我必须这样做,以便我可以一次性将 df 插入到 db 中。我的数据框如下所示:

dput(pp)
structure(list(DATE_TIME = structure(c(1369320180, 1369321980, 
1369323780, 1369325580, 1369327380, 1369285980, 1369287780, 1369289580, 
1369291380, 1369293180, 1369294980, 1369296780, 1369298580, 1369300380, 
1369302180, 1369303980, 1369305780, 1369307580, 1369309380, 1369311180
), class = c("POSIXct", "POSIXt"), tzone = ""), SITE = c("Home Page Login", 
"Home Page Login", "Home Page Login", "Home Page Login", "Home Page Login", 
"Home Page Login", "Home Page Login", "Home Page Login", "Home Page Login", 
"Home Page Login", "Home Page Login", "Home Page Login", "Home Page Login", 
"Home Page Login", "Home Page Login", "Home Page Login", "Home Page Login", 
"Home Page Login", "Home Page Login", "Home Page Login"), RESPONSE_TIME = c("0.728", 
"0.513", "0.308", "0.432", "0.877", "0.541", "0.736", "0.333", 
"0.354", "0.279", "0.178", "0.699", "0.583", "-", "0.372", "0.972", 
"1.066", "0.550", "0.246", "0.299"), AVAIL_PERCENT = c("100.00", 
"100.00", "100.00", "100.00", "100.00", "100.00", "100.00", "100.00", 
"100.00", "100.00", "100.00", "100.00", "100.00", "-", "100.00", 
"100.00", "100.00", "100.00", "100.00", "100.00"), AGENT = c(42627, 
42627, 42627, 42627, 42627, 42627, 42627, 42627, 42627, 42627, 
42627, 42627, 42627, 42627, 42627, 42627, 42627, 42627, 42627, 
42627)), .Names = c("DATE_TIME", "SITE", "RESPONSE_TIME", "AVAIL_PERCENT", 
"AGENT"), row.names = c(NA, 20L), class = "data.frame")

我已经构建了一个函数,该函数将数据框的名称作为参数,一次遍历一个代理和一个站点的数据框,进入数据库检索最新的时间戳,并对数据框进行子集化。最终的数据帧将被插入到数据库中。我的功能如下:

normal<-function(x) {
    x<-unique(x)
    pagelist<-unique(x$SITE)
    agentlist<-unique(x$AGENT)
    latest<-data.frame(DATE_TIME=as.POSIXct(character()), SITE=character(), RESPONSE_TIME=as.numeric(character()), AVAIL_PERCENT=as.numeric(character()), AGENT=as.numeric(character()))
for(j in 1:length(agentlist)){  
    new<-subset(x, AGENT==agentlist[j])
    for(i in 1:length(pagelist)) {

        sql1<-c("SELECT max(T.DATE_TIME) FROM <TABLE NAME> T where T.SITE=")
        sql2<-pagelist[i]
        sql<-paste(sql1, sql2, sep="'")
        sql<-gsub("\\Z","'", sql, perl=T)

        sql<-paste(sql, "and T.AGENT=", sep=" ")
        sql<-paste(sql, agentlist[j], sep="'")
        sql<-gsub("\\Z","'", sql, perl=T)
        print(sql)
        ch=odbcConnect("<userid>",pwd = "<passwd>")
        latest_date<-sqlQuery(ch, sql)
        odbcClose(ch)
        latest_date<-latest_date[1,1]
        new1<-subset(new, SITE==pagelist[i] & DATE_TIME > latest_date)
        latest<-rbind(new1, latest)
        print(head(latest,5))
        return(latest)
    }
}
}

当我这样做时:

df<-正常(pp)

我只得到一个站点和一个代理。最终的 df 应该有每个站点的所有代理。任何想法我在这里做错了什么?

4

1 回答 1

1

我不得不将 return 语句放在循环之外。

normal<-function(x) {
        x<-unique(x)
        pagelist<-unique(x$SITE)
        agentlist<-unique(x$AGENT)
        latest<-data.frame(DATE_TIME=as.POSIXct(character()), SITE=character(), RESPONSE_TIME=as.numeric(character()), AVAIL_PERCENT=as.numeric(character()), AGENT=as.numeric(character()))
    for(j in 1:length(agentlist)){  
        new<-subset(x, AGENT==agentlist[j])
        for(i in 1:length(pagelist)) {

            sql1<-c("SELECT max(T.DATE_TIME) FROM <TABLE NAME> T where T.SITE=")
            sql2<-pagelist[i]
            sql<-paste(sql1, sql2, sep="'")
            sql<-gsub("\\Z","'", sql, perl=T)

            sql<-paste(sql, "and T.AGENT=", sep=" ")
            sql<-paste(sql, agentlist[j], sep="'")
            sql<-gsub("\\Z","'", sql, perl=T)
            print(sql)
            ch=odbcConnect("<userid>",pwd = "<passwd>")
            latest_date<-sqlQuery(ch, sql)
            odbcClose(ch)
            latest_date<-latest_date[1,1]
            new1<-subset(new, SITE==pagelist[i] & DATE_TIME > latest_date)
            latest<-rbind(new1, latest)
            print(head(latest,5))
                }
    }
    return(latest)

    }
于 2013-05-28T16:29:02.083 回答