3

我被要求让不同的问题更清楚,所以它们在顶部:

  • 如何计算团队的滚动平均值,不包括当前周
  • 如何为对手球队添加包含相似统计数据的列

以下是原文:

我正在学习 R 来对运动进行一些扶手椅分析。现在,我遇到了一个问题,我有一个 NFL 赛季中每场比赛的列表,我正在尝试计算在这场比赛之前的几周内进攻的 AvgTotalYds 是多少。最终,我希望能够计算出本赛季至今的平均值,以及过去 X 个时期的移动平均值。

更复杂的是,我还想获得有关对手前一周的相同信息。我已经搜索了很多类似的问题,但找不到任何解决方案。

以下是数据样本。我得到的数据库有一些不幸的列名。ScoreOff 实际上是指球队在 TeamName 字段中的总得分,无论是进攻、防守还是特殊球队比赛。*Def,同样,指的是对手。代码示例使用标记为“df2”的数据框。

dput(head(df2))

structure(list(Date = structure(c(14126, 14126, 14129, 14129, 
14129, 14129), class = "Date"), TeamName = structure(c(21L, 32L, 
1L, 2L, 3L, 4L), .Label = c("Arizona Cardinals", "Atlanta Falcons", 
"Baltimore Ravens", "Buffalo Bills", "Carolina Panthers", "Chicago Bears", 
"Cincinnati Bengals", "Cleveland Browns", "Dallas Cowboys", "Denver Broncos", 
"Detroit Lions", "Green Bay Packers", "Houston Texans", "Indianapolis Colts", 
"Jacksonville Jaguars", "Kansas City Chiefs", "Miami Dolphins", 
"Minnesota Vikings", "New England Patriots", "New Orleans Saints", 
"New York Giants", "New York Jets", "Oakland Raiders", "Philadelphia Eagles", 
"Pittsburgh Steelers", "San Diego Chargers", "San Francisco 49ers", 
"Seattle Seahawks", "St Louis Rams", "Tampa Bay Buccaneers", 
"Tennessee Titans", "Washington Redskins"), class = "factor"), 
    ScoreOff = c(16L, 7L, 23L, 34L, 17L, 34L), FirstDownOff = c(21L, 
    11L, 18L, 23L, 21L, 13L), ThirdDownPctOff = structure(c(34L, 
    14L, 20L, 21L, 35L, 16L), .Label = c("0%", "10%", "11%", 
    "12%", "13%", "14%", "15%", "17%", "18%", "19%", "20%", "21%", 
    "22%", "23%", "24%", "25%", "27%", "29%", "30%", "31%", "33%", 
    "35%", "36%", "37%", "38%", "40%", "41%", "42%", "43%", "44%", 
    "45%", "46%", "47%", "50%", "53%", "54%", "55%", "56%", "57%", 
    "58%", "59%", "60%", "61%", "62%", "63%", "64%", "65%", "67%", 
    "69%", "73%", "77%", "8%", "80%", "9%", "92%"), class = "factor"), 
    RushAttOff = c(32L, 24L, 39L, 42L, 46L, 29L), RushYdsOff = c(154L, 
    84L, 109L, 318L, 229L, 106L), PassAttOff = c(35L, 27L, 30L, 
    13L, 29L, 31L), PassCompOff = c(19L, 15L, 19L, 9L, 15L, 20L
    ), PassYdsOff = c(216L, 133L, 197L, 161L, 129L, 234L), PassIntOff = c(1L, 
    0L, 0L, 0L, 0L, 0L), FumblesOff = c(0L, 0L, 0L, 0L, 2L, 0L
    ), SackYdsOff = c(16L, 8L, 21L, 5L, 0L, 2L), PenYdsOff = c(70L, 
    35L, 40L, 68L, 64L, 14L), TimePossOff = structure(c(348L, 
    52L, 368L, 175L, 354L, 239L), .Label = c("14:45", "18:15", 
    "18:27", "19:31", "19:56", "20:11", "20:12", "20:26", "20:48", 
    "21:03", "21:08", "21:16", "21:26", "21:28", "21:35", "21:44", 
    "21:45", "21:52", "21:54", "22:03", "22:08", "22:12", "22:16", 
    "22:25", "22:30", "22:31", "22:33", "22:34", "22:38", "22:39", 
    "22:53", "22:55", "22:59", "23:09", "23:10", "23:12", "23:15", 
    "23:23", "23:28", "23:30", "23:33", "23:37", "23:38", "23:42", 
    "23:43", "23:45", "23:48", "23:49", "23:56", "24:06", "24:13", 
    "24:17", "24:18", "24:21", "24:33", "24:34", "24:35", "24:41", 
    "24:43", "24:49", "24:50", "24:54", "24:58", "24:59", "25:01", 
    "25:02", "25:05", "25:11", "25:14", "25:16", "25:19", "25:25", 
    "25:29", "25:31", "25:32", "25:34", "25:36", "25:37", "25:38", 
    "25:40", "25:41", "25:46", "25:47", "25:53", "25:55", "25:57", 
    "25:58", "26:00", "26:04", "26:09", "26:10", "26:11", "26:12", 
    "26:13", "26:16", "26:20", "26:27", "26:32", "26:36", "26:37", 
    "26:38", "26:39", "26:40", "26:41", "26:44", "26:46", "26:49", 
    "26:53", "26:56", "26:59", "27:01", "27:04", "27:10", "27:12", 
    "27:13", "27:15", "27:18", "27:20", "27:24", "27:25", "27:26", 
    "27:27", "27:28", "27:30", "27:32", "27:37", "27:40", "27:44", 
    "27:46", "27:47", "27:48", "27:50", "27:51", "27:52", "27:53", 
    "27:55", "27:57", "27:58", "27:59", "28:00", "28:01", "28:03", 
    "28:05", "28:06", "28:07", "28:13", "28:14", "28:16", "28:17", 
    "28:18", "28:19", "28:21", "28:22", "28:24", "28:25", "28:28", 
    "28:29", "28:32", "28:38", "28:40", "28:41", "28:45", "28:47", 
    "28:49", "28:51", "28:53", "28:55", "28:57", "28:58", "28:59", 
    "29:00", "29:02", "29:04", "29:05", "29:07", "29:08", "29:11", 
    "29:13", "29:14", "29:18", "29:19", "29:20", "29:26", "29:27", 
    "29:29", "29:31", "29:32", "29:33", "29:34", "29:36", "29:37", 
    "29:38", "29:41", "29:42", "29:43", "29:49", "29:50", "29:55", 
    "29:56", "29:59", "30:01", "30:04", "30:05", "30:10", "30:11", 
    "30:17", "30:18", "30:19", "30:22", "30:23", "30:24", "30:26", 
    "30:27", "30:28", "30:29", "30:31", "30:33", "30:34", "30:40", 
    "30:41", "30:42", "30:46", "30:47", "30:49", "30:52", "30:53", 
    "30:55", "30:58", "31:00", "31:01", "31:02", "31:03", "31:05", 
    "31:07", "31:09", "31:11", "31:13", "31:15", "31:19", "31:20", 
    "31:22", "31:28", "31:31", "31:32", "31:35", "31:36", "31:38", 
    "31:39", "31:41", "31:42", "31:43", "31:44", "31:46", "31:47", 
    "31:53", "31:54", "31:55", "31:57", "31:59", "32:00", "32:01", 
    "32:02", "32:03", "32:05", "32:07", "32:08", "32:09", "32:10", 
    "32:12", "32:13", "32:14", "32:16", "32:20", "32:23", "32:28", 
    "32:30", "32:32", "32:33", "32:34", "32:35", "32:36", "32:40", 
    "32:42", "32:45", "32:47", "32:48", "32:50", "32:56", "32:59", 
    "33:01", "33:04", "33:07", "33:11", "33:14", "33:16", "33:19", 
    "33:20", "33:21", "33:22", "33:23", "33:24", "33:28", "33:33", 
    "33:40", "33:44", "33:47", "33:48", "33:49", "33:50", "33:51", 
    "33:56", "34:00", "34:02", "34:03", "34:05", "34:07", "34:13", 
    "34:14", "34:19", "34:20", "34:22", "34:23", "34:24", "34:26", 
    "34:28", "34:29", "34:31", "34:35", "34:41", "34:44", "34:46", 
    "34:49", "34:55", "34:58", "34:59", "35:01", "35:02", "35:06", 
    "35:10", "35:11", "35:17", "35:19", "35:25", "35:26", "35:27", 
    "35:39", "35:42", "35:43", "35:47", "35:54", "36:04", "36:11", 
    "36:12", "36:15", "36:17", "36:18", "36:22", "36:23", "36:27", 
    "36:30", "36:32", "36:37", "36:45", "36:48", "36:50", "36:51", 
    "37:01", "37:05", "37:07", "37:21", "37:22", "37:26", "37:27", 
    "37:29", "37:30", "37:35", "37:42", "37:44", "37:48", "37:52", 
    "37:57", "38:08", "38:15", "38:16", "38:23", "38:25", "38:32", 
    "38:34", "38:44", "38:52", "38:57", "39:12", "39:34", "39:48", 
    "39:49", "40:04", "40:29", "41:33", "41:45", "45:15"), class = "factor"), 
    PuntAvgOff = c(36.3, 37.9, 45, 38.3, 48.2, 46.6), Opponent = structure(c(32L, 
    21L, 27L, 11L, 7L, 28L), .Label = c("Arizona Cardinals", 
    "Atlanta Falcons", "Baltimore Ravens", "Buffalo Bills", "Carolina Panthers", 
    "Chicago Bears", "Cincinnati Bengals", "Cleveland Browns", 
    "Dallas Cowboys", "Denver Broncos", "Detroit Lions", "Green Bay Packers", 
    "Houston Texans", "Indianapolis Colts", "Jacksonville Jaguars", 
    "Kansas City Chiefs", "Miami Dolphins", "Minnesota Vikings", 
    "New England Patriots", "New Orleans Saints", "New York Giants", 
    "New York Jets", "Oakland Raiders", "Philadelphia Eagles", 
    "Pittsburgh Steelers", "San Diego Chargers", "San Francisco 49ers", 
    "Seattle Seahawks", "St Louis Rams", "Tampa Bay Buccaneers", 
    "Tennessee Titans", "Washington Redskins"), class = "factor"), 
    ScoreDef = c(7L, 16L, 13L, 21L, 10L, 10L), FirstDownDef = c(11L, 
    21L, 13L, 21L, 8L, 16L), ThirdDownPctDef = structure(c(14L, 
    34L, 25L, 13L, 7L, 10L), .Label = c("0%", "10%", "11%", "12%", 
    "13%", "14%", "15%", "17%", "18%", "19%", "20%", "21%", "22%", 
    "23%", "24%", "25%", "27%", "29%", "30%", "31%", "33%", "35%", 
    "36%", "37%", "38%", "40%", "41%", "42%", "43%", "44%", "45%", 
    "46%", "47%", "50%", "53%", "54%", "55%", "56%", "57%", "58%", 
    "59%", "60%", "61%", "62%", "63%", "64%", "65%", "67%", "69%", 
    "73%", "77%", "8%", "80%", "9%", "92%"), class = "factor"), 
    RushAttDef = c(24L, 32L, 20L, 21L, 23L, 21L), RushYdsDef = c(84L, 
    154L, 108L, 62L, 65L, 85L), PassAttDef = c(27L, 35L, 20L, 
    33L, 25L, 41L), PassCompDef = c(15L, 19L, 14L, 24L, 10L, 
    17L), PassYdsDef = c(133L, 216L, 195L, 262L, 99L, 190L), 
    PassIntDef = c(0L, 1L, 1L, 1L, 1L, 1L), FumblesDef = c(0L, 
    0L, 4L, 0L, 1L, 1L), SackYdsDef = c(8L, 16L, 12L, 16L, 10L, 
    23L), PenYdsDef = c(35L, 70L, 20L, 30L, 40L, 30L), TimePossDef = structure(c(52L, 
    348L, 32L, 225L, 46L, 161L), .Label = c("14:45", "18:15", 
    "18:27", "19:31", "19:56", "20:11", "20:12", "20:26", "20:48", 
    "21:03", "21:08", "21:16", "21:26", "21:28", "21:35", "21:44", 
    "21:45", "21:52", "21:54", "22:03", "22:08", "22:12", "22:16", 
    "22:25", "22:30", "22:31", "22:33", "22:34", "22:38", "22:39", 
    "22:53", "22:55", "22:59", "23:09", "23:10", "23:12", "23:15", 
    "23:23", "23:28", "23:30", "23:33", "23:37", "23:38", "23:42", 
    "23:43", "23:45", "23:48", "23:49", "23:56", "24:06", "24:13", 
    "24:17", "24:18", "24:21", "24:33", "24:34", "24:35", "24:41", 
    "24:43", "24:49", "24:50", "24:54", "24:58", "24:59", "25:01", 
    "25:02", "25:05", "25:11", "25:14", "25:16", "25:19", "25:25", 
    "25:29", "25:31", "25:32", "25:34", "25:36", "25:37", "25:38", 
    "25:40", "25:41", "25:46", "25:47", "25:53", "25:55", "25:57", 
    "25:58", "26:00", "26:04", "26:09", "26:10", "26:11", "26:12", 
    "26:13", "26:16", "26:20", "26:27", "26:32", "26:36", "26:37", 
    "26:38", "26:39", "26:40", "26:41", "26:44", "26:46", "26:49", 
    "26:53", "26:56", "26:59", "27:01", "27:04", "27:10", "27:12", 
    "27:13", "27:15", "27:18", "27:20", "27:24", "27:25", "27:26", 
    "27:27", "27:28", "27:30", "27:32", "27:37", "27:40", "27:44", 
    "27:46", "27:47", "27:48", "27:50", "27:51", "27:52", "27:53", 
    "27:55", "27:57", "27:58", "27:59", "28:00", "28:01", "28:03", 
    "28:05", "28:06", "28:07", "28:13", "28:14", "28:16", "28:17", 
    "28:18", "28:19", "28:21", "28:22", "28:24", "28:25", "28:28", 
    "28:29", "28:32", "28:38", "28:40", "28:41", "28:45", "28:47", 
    "28:49", "28:51", "28:53", "28:55", "28:57", "28:58", "28:59", 
    "29:00", "29:02", "29:05", "29:07", "29:08", "29:11", "29:13", 
    "29:14", "29:18", "29:19", "29:20", "29:26", "29:27", "29:29", 
    "29:31", "29:32", "29:33", "29:34", "29:36", "29:37", "29:38", 
    "29:41", "29:42", "29:43", "29:49", "29:50", "29:55", "29:56", 
    "29:59", "30:01", "30:04", "30:05", "30:10", "30:11", "30:17", 
    "30:18", "30:19", "30:22", "30:23", "30:24", "30:26", "30:27", 
    "30:28", "30:29", "30:31", "30:33", "30:34", "30:40", "30:41", 
    "30:42", "30:46", "30:47", "30:49", "30:52", "30:53", "30:55", 
    "30:56", "30:58", "31:00", "31:01", "31:02", "31:03", "31:05", 
    "31:07", "31:09", "31:11", "31:13", "31:15", "31:19", "31:20", 
    "31:22", "31:28", "31:31", "31:32", "31:35", "31:36", "31:38", 
    "31:39", "31:41", "31:42", "31:43", "31:44", "31:46", "31:47", 
    "31:53", "31:54", "31:55", "31:57", "31:59", "32:00", "32:01", 
    "32:02", "32:03", "32:05", "32:07", "32:08", "32:09", "32:10", 
    "32:12", "32:13", "32:14", "32:16", "32:20", "32:23", "32:28", 
    "32:30", "32:32", "32:33", "32:34", "32:35", "32:36", "32:40", 
    "32:42", "32:45", "32:47", "32:48", "32:50", "32:56", "32:59", 
    "33:01", "33:04", "33:07", "33:11", "33:14", "33:16", "33:19", 
    "33:20", "33:21", "33:22", "33:23", "33:24", "33:28", "33:33", 
    "33:40", "33:44", "33:47", "33:48", "33:49", "33:50", "33:51", 
    "33:56", "34:00", "34:02", "34:03", "34:05", "34:07", "34:13", 
    "34:14", "34:19", "34:20", "34:22", "34:23", "34:24", "34:26", 
    "34:28", "34:29", "34:31", "34:35", "34:41", "34:44", "34:46", 
    "34:49", "34:55", "34:58", "34:59", "35:01", "35:02", "35:06", 
    "35:10", "35:11", "35:17", "35:19", "35:25", "35:26", "35:27", 
    "35:39", "35:42", "35:43", "35:47", "35:54", "36:04", "36:11", 
    "36:12", "36:15", "36:17", "36:18", "36:22", "36:23", "36:27", 
    "36:30", "36:32", "36:37", "36:45", "36:48", "36:50", "36:51", 
    "37:01", "37:05", "37:07", "37:21", "37:22", "37:26", "37:27", 
    "37:29", "37:30", "37:35", "37:42", "37:44", "37:48", "37:52", 
    "37:57", "38:08", "38:15", "38:16", "38:23", "38:25", "38:32", 
    "38:34", "38:44", "38:52", "38:57", "39:12", "39:34", "39:48", 
    "39:49", "40:04", "40:29", "41:33", "41:45", "45:15"), class = "factor"), 
    Site = structure(c(1L, 3L, 3L, 1L, 1L, 1L), .Label = c("H", 
    "N", "V"), class = "factor"), Line = c(4.5, -4.5, 2.5, -3, 
    -2, 1), Totalline = c(41.5, 41.5, 42, 41, 37.5, 38.5), TotalYdsOff = c(370L, 
    217L, 306L, 479L, 358L, 340L), TotalYdsDef = c(217L, 370L, 
    303L, 324L, 164L, 275L), ActualLine = c(-9L, 9L, -10L, -13L, 
    -7L, -24L)), .Names = c("Date", "TeamName", "ScoreOff", "FirstDownOff", 
"ThirdDownPctOff", "RushAttOff", "RushYdsOff", "PassAttOff", 
"PassCompOff", "PassYdsOff", "PassIntOff", "FumblesOff", "SackYdsOff", 
"PenYdsOff", "TimePossOff", "PuntAvgOff", "Opponent", "ScoreDef", 
"FirstDownDef", "ThirdDownPctDef", "RushAttDef", "RushYdsDef", 
"PassAttDef", "PassCompDef", "PassYdsDef", "PassIntDef", "FumblesDef", 
"SackYdsDef", "PenYdsDef", "TimePossDef", "Site", "Line", "Totalline", 
"TotalYdsOff", "TotalYdsDef", "ActualLine"), row.names = c(NA, 
6L), class = "data.frame")

我添加了 TotalYds[Off|Def] 列,因为这很简单。最接近正确计算移动平均值的方法是使用 zoo 和 plyr 库以及以下命令完成的:

ddply(df2, .(TeamName), summarise, rollmean(TotalYdsOff, k=4, fill=0, align="right"))

几乎可以满足我的要求,只是它将平均使用当前一周的信息。

至于获取对手的匹配信息,我想有一种方法可以从“TeamName”和“Date”都匹配当前行的“Opponent”和“Date”的行中提取相同的数据。 " 这是因为数据库有两个关于给定比赛的条目,一个用于主队,一个用于客队(并且 *Off 和 *Def 被交换)。查看示例数据中的第 1 行和第 2 行,特别是 Date、TeamName 和 Opponent,您就会明白我想说什么。

这里有什么指导吗?我想这对于在 R 中修修补补几天的人来说是相对微不足道的,他们会知道一些函数或库可以做到这一点。然而,我只有几天,因此遇到了一些麻烦。

4

2 回答 2

2

解决问题 1 的一种简单方法是使用ddply您描述的调用,但要传递一个数据框,其中删除了本周的所有游戏:

require(plyr)
dfRedacted <- ddply(df2, .(TeamName), function(x) subset(x, Date!=max(Date)))
meanStats <- ddply(dfRedacted, .(TeamName), summarise, rollmean(TotalYdsOff, k=4, fill=0, align="right"))
于 2012-10-17T21:11:09.027 回答
0

现在,我最终创建了一个函数来计算(但不包括)给定比赛的赛季平均值,并将结果放入单独的向量中,然后仅使用 cbind() 将其添加到数据框中:

foo <- vector()
for(each in levels(df$TeamName)) {
  foo <- c(foo, calc_avg_yds(df, each))
}

df <- cbind(df[order(df$TeamName), ], AvgTotalYdsOff = foo)

如您所见,我按团队名称对 df 进行了重新排序(次要是日期,它已经被排序)以确保它们匹配。

为了从相应的行(游戏中另一支球队的行)获取信息,我做了一个循环并将所有内容放在一个向量中,然后是另一个 cbind():

for(i in nrow(df)) {
   foo <- c(foo, subset(df, TeamName==df[i,]$Opponent & Date==df[i,]$Date)$AvgTotalYdsOff)
}
df <- cbind(df, AvgTotalYdsDef = foo)

最后,我选择了简单粗暴的路线,因为我不知道有更好的选择。希望这可以帮助将来遇到类似问题的人。

于 2012-10-18T16:41:25.257 回答