1

我有一系列足球比赛结果,并希望了解一支球队在特定数量的比赛中得了多少分

这是一个子集的头部,其中包含自最新结果以来在一个赛季中得分的累积分数

我因不使用 dput 而被打了几次手腕,所以请忍耐

allData <- structure(list(team = c("Arsenal", "Tottenham H", "Tottenham H", 
"Arsenal", "Arsenal", "Tottenham H"), venue = c("H", "A", "H", 
"A", "H", "A"), result = c("W", "D", "W", "L", "W", "D"), GF = c(1L, 
0L, 3L, 1L, 3L, 0L), GA = c(0L, 0L, 1L, 2L, 0L, 0L), gameDate = structure(c(1333868400, 
1333782000, 1333263600, 1333177200, 1332572400, 1332572400), class = c("POSIXct", 
"POSIXt"), tzone = ""), season = structure(c(2L, 2L, 2L, 2L, 
2L, 2L), .Label = c("2010/2011", "2011/2012"), class = "factor"), 
 points = c(3, 1, 3, 0, 3, 1), GD = c(1L, 0L, 2L, -1L, 3L, 
0L), cumpts = c(3, 1, 4, 3, 6, 5)), .Names = c("team", "venue", 
"result", "GF", "GA", "gameDate", "season", "points", "GD", "cumpts"
), row.names = c(NA, 6L), class = "data.frame")

这是一支球队在一个赛季中的数据

spurs <- structure(list(team = c("Tottenham H", "Tottenham H", "Tottenham H", 
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", 
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", 
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", 
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", 
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", 
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H"), 
    venue = c("A", "H", "A", "H", "A", "H", "A", "H", "A", "H", 
    "A", "H", "H", "H", "A", "A", "H", "H", "A", "H", "A", "H", 
    "A", "H", "A", "A", "H", "A", "H", "A", "H", "A"), result = c("D", 
    "W", "D", "D", "L", "L", "L", "W", "D", "W", "L", "D", "W", 
    "W", "D", "W", "D", "W", "L", "W", "W", "W", "W", "W", "W", 
    "D", "W", "W", "W", "W", "L", "L"), GF = c(0L, 3L, 0L, 1L, 
    0L, 1L, 2L, 5L, 0L, 3L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 
    3L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 4L, 2L, 1L, 0L), GA = c(0L, 
    1L, 0L, 1L, 1L, 3L, 5L, 0L, 0L, 1L, 3L, 1L, 0L, 0L, 1L, 0L, 
    1L, 0L, 2L, 0L, 1L, 0L, 1L, 1L, 1L, 2L, 1L, 1L, 0L, 0L, 5L, 
    3L), gameDate = structure(c(1333782000, 1333263600, 1332572400, 
    1332313200, 1331366400, 1330848000, 1330243200, 1328947200, 
    1328515200, 1327996800, 1327219200, 1326528000, 1326268800, 
    1325577600, 1325318400, 1324972800, 1324540800, 1324281600, 
    1323590400, 1322899200, 1322294400, 1321862400, 1320562800, 
    1319958000, 1319353200, 1318748400, 1317538800, 1316847600, 
    1316329200, 1315638000, 1314514800, 1313996400), class = c("POSIXct", 
    "POSIXt"), tzone = ""), season = structure(c(2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("2010/2011", 
    "2011/2012"), class = "factor"), points = c(1, 3, 1, 1, 0, 
    0, 0, 3, 1, 3, 0, 1, 3, 3, 1, 3, 1, 3, 0, 3, 3, 3, 3, 3, 
    3, 1, 3, 3, 3, 3, 0, 0), GD = c(0L, 2L, 0L, 0L, -1L, -2L, 
    -3L, 5L, 0L, 2L, -1L, 0L, 2L, 1L, 0L, 2L, 0L, 1L, -1L, 3L, 
    2L, 2L, 2L, 2L, 1L, 0L, 1L, 1L, 4L, 2L, -4L, -3L), cumpts = c(1, 
    4, 5, 6, 6, 6, 6, 9, 10, 13, 13, 14, 17, 20, 21, 24, 25, 
    28, 28, 31, 34, 37, 40, 43, 46, 47, 50, 53, 56, 59, 59, 59
    )), .Names = c("team", "venue", "result", "GF", "GA", "gameDate", 
"season", "points", "GD", "cumpts"), row.names = c(NA, -32L), class = "data.frame")

然后我在马刺数据框上有这个代码来计算在特定游戏长度中得分的分数(这里是 5)

gameLength <- 5
seasonLength <- nrow(spurs)
cumPoints <- c()
cumPoints[1] <- spurs[gameLength,]$cumpts
for (i in gameLength+1:seasonLength) {
cumPoints[i-(gameLength-1)] <- ((spurs[i,]$cumpts)- 
 (spurs[i-gameLength,]$cumpts))
}
cumPoints <- cumPoints[!is.na(cumPoints)] # not sure why throws up NAs

这会产生正确的输出

 [1]  6  5  2  4  4  7  7  8  8 10  8 11 11 11  8 10 10 12 12 15 15
[22] 13 13 13 13 13 12  9

但我需要能够使用包含数据框中每个赛季和球队的数据的列来转换 allData。

我假设我应该以某种方式使用 ddply ,除非有更好的选择

4

1 回答 1

1

要复制您的输出:

library(zoo)
rollapply(spurs$GD, gamelength, sum)

如果你有 allData 看起来像马刺 data.frame ......

rollsum <- function(df, gamelen=gamelength) {
  require(zoo)
  out <- rollapply(df$points, gamelen, sum)

  return(out)
}

ddply(allData, .(team), rollsum)
于 2012-04-10T19:19:54.293 回答