我有一系列足球比赛结果,并希望了解一支球队在特定数量的比赛中得了多少分
这是一个子集的头部,其中包含自最新结果以来在一个赛季中得分的累积分数
我因不使用 dput 而被打了几次手腕,所以请忍耐
allData <- structure(list(team = c("Arsenal", "Tottenham H", "Tottenham H",
"Arsenal", "Arsenal", "Tottenham H"), venue = c("H", "A", "H",
"A", "H", "A"), result = c("W", "D", "W", "L", "W", "D"), GF = c(1L,
0L, 3L, 1L, 3L, 0L), GA = c(0L, 0L, 1L, 2L, 0L, 0L), gameDate = structure(c(1333868400,
1333782000, 1333263600, 1333177200, 1332572400, 1332572400), class = c("POSIXct",
"POSIXt"), tzone = ""), season = structure(c(2L, 2L, 2L, 2L,
2L, 2L), .Label = c("2010/2011", "2011/2012"), class = "factor"),
points = c(3, 1, 3, 0, 3, 1), GD = c(1L, 0L, 2L, -1L, 3L,
0L), cumpts = c(3, 1, 4, 3, 6, 5)), .Names = c("team", "venue",
"result", "GF", "GA", "gameDate", "season", "points", "GD", "cumpts"
), row.names = c(NA, 6L), class = "data.frame")
这是一支球队在一个赛季中的数据
spurs <- structure(list(team = c("Tottenham H", "Tottenham H", "Tottenham H",
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H",
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H",
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H",
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H",
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H",
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H"),
venue = c("A", "H", "A", "H", "A", "H", "A", "H", "A", "H",
"A", "H", "H", "H", "A", "A", "H", "H", "A", "H", "A", "H",
"A", "H", "A", "A", "H", "A", "H", "A", "H", "A"), result = c("D",
"W", "D", "D", "L", "L", "L", "W", "D", "W", "L", "D", "W",
"W", "D", "W", "D", "W", "L", "W", "W", "W", "W", "W", "W",
"D", "W", "W", "W", "W", "L", "L"), GF = c(0L, 3L, 0L, 1L,
0L, 1L, 2L, 5L, 0L, 3L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
3L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 4L, 2L, 1L, 0L), GA = c(0L,
1L, 0L, 1L, 1L, 3L, 5L, 0L, 0L, 1L, 3L, 1L, 0L, 0L, 1L, 0L,
1L, 0L, 2L, 0L, 1L, 0L, 1L, 1L, 1L, 2L, 1L, 1L, 0L, 0L, 5L,
3L), gameDate = structure(c(1333782000, 1333263600, 1332572400,
1332313200, 1331366400, 1330848000, 1330243200, 1328947200,
1328515200, 1327996800, 1327219200, 1326528000, 1326268800,
1325577600, 1325318400, 1324972800, 1324540800, 1324281600,
1323590400, 1322899200, 1322294400, 1321862400, 1320562800,
1319958000, 1319353200, 1318748400, 1317538800, 1316847600,
1316329200, 1315638000, 1314514800, 1313996400), class = c("POSIXct",
"POSIXt"), tzone = ""), season = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("2010/2011",
"2011/2012"), class = "factor"), points = c(1, 3, 1, 1, 0,
0, 0, 3, 1, 3, 0, 1, 3, 3, 1, 3, 1, 3, 0, 3, 3, 3, 3, 3,
3, 1, 3, 3, 3, 3, 0, 0), GD = c(0L, 2L, 0L, 0L, -1L, -2L,
-3L, 5L, 0L, 2L, -1L, 0L, 2L, 1L, 0L, 2L, 0L, 1L, -1L, 3L,
2L, 2L, 2L, 2L, 1L, 0L, 1L, 1L, 4L, 2L, -4L, -3L), cumpts = c(1,
4, 5, 6, 6, 6, 6, 9, 10, 13, 13, 14, 17, 20, 21, 24, 25,
28, 28, 31, 34, 37, 40, 43, 46, 47, 50, 53, 56, 59, 59, 59
)), .Names = c("team", "venue", "result", "GF", "GA", "gameDate",
"season", "points", "GD", "cumpts"), row.names = c(NA, -32L), class = "data.frame")
然后我在马刺数据框上有这个代码来计算在特定游戏长度中得分的分数(这里是 5)
gameLength <- 5
seasonLength <- nrow(spurs)
cumPoints <- c()
cumPoints[1] <- spurs[gameLength,]$cumpts
for (i in gameLength+1:seasonLength) {
cumPoints[i-(gameLength-1)] <- ((spurs[i,]$cumpts)-
(spurs[i-gameLength,]$cumpts))
}
cumPoints <- cumPoints[!is.na(cumPoints)] # not sure why throws up NAs
这会产生正确的输出
[1] 6 5 2 4 4 7 7 8 8 10 8 11 11 11 8 10 10 12 12 15 15
[22] 13 13 13 13 13 12 9
但我需要能够使用包含数据框中每个赛季和球队的数据的列来转换 allData。
我假设我应该以某种方式使用 ddply ,除非有更好的选择