我有一个数据框,其中包含课程 ID、学生 ID、周数(第一周为 1,第二周为 2,...),以及每个用户每周在每门课程中所做的一些信息。如果讲师在那周在该课程中“干预”学生,则 df 的最后两列是非 NA,否则为 NA。我想比较每个学生在第一次干预之前和之后的行为。
所以我想做的是一个专栏,“HasIntervened”,它比学生的第一次干预少几周为假,大于或等于几周为真,但我有一个地狱般的时间来创建它简单的列。我相当肯定这将aggregate
是要走的路,但我只是没有以正确的方式思考这个问题。
这是dput
数据框的前 60 行(5 个学生的价值):
structure(list(UserID = c(4188948L, 4188948L, 4188948L, 4188948L,
4188948L, 4188948L, 4735684L, 4735684L, 4735684L, 4735684L, 4735684L,
4735684L, 6292486L, 6292486L, 6292486L, 6292486L, 6292486L, 6292486L,
6469671L, 6469671L, 6469671L, 6469671L, 6469671L, 6469671L, 6538263L,
6538263L, 6538263L, 6538263L, 6538263L, 6538263L, 6621258L, 6621258L,
6621258L, 6621258L, 6621258L, 6621258L, 6891869L, 6891869L, 6891869L,
6891869L, 6891869L, 6891869L, 6891869L, 6891869L, 6891869L, 6891869L,
6891869L, 6891869L, 6978155L, 6978155L, 6978155L, 6978155L, 6978155L,
6978155L, 7195846L, 7195846L, 7195846L, 7195846L, 7195846L, 7195846L
), CourseID = c(6567871L, 6567871L, 6567871L, 6567871L, 6567871L,
6567871L, 6567168L, 6567168L, 6567168L, 6567168L, 6567168L, 6567168L,
6567864L, 6567864L, 6567864L, 6567864L, 6567864L, 6567864L, 6567159L,
6567159L, 6567159L, 6567159L, 6567159L, 6567159L, 6567162L, 6567162L,
6567162L, 6567162L, 6567162L, 6567162L, 6567853L, 6567853L, 6567853L,
6567853L, 6567853L, 6567853L, 6567159L, 6567159L, 6567159L, 6567159L,
6567159L, 6567159L, 6567864L, 6567864L, 6567864L, 6567864L, 6567864L,
6567864L, 6567873L, 6567873L, 6567873L, 6567873L, 6567873L, 6567873L,
6567859L, 6567859L, 6567859L, 6567859L, 6567859L, 6567859L),
WeekInCourse = c(1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2,
3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3,
4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4,
5, 6, 1, 2, 3, 4, 5, 6), WeekPostCount = c(1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 3L, 4L, 3L, 3L, 0L, 4L,
0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 2L,
2L, 0L, 0L, 4L, 0L, 3L, 0L, 3L, 0L, 0L, 0L), WeekLoginCount = c(2L,
1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 4L, 4L, 1L, 0L,
0L, 0L, 3L, 3L, 1L, 0L, 0L, 0L, 2L, 1L, 0L, 0L, 0L, 0L, 1L,
1L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 4L, 1L, 0L, 0L,
0L, 0L, 3L, 3L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L),
WeekPointsPercent = c(0, 0, 0, 0, 0, 0, 0, 0.185714285714286,
0.375, 0.2, 0, 0, 0, 0.85, 0.7, 0.4, 0.7, 0.7, 0, 0.857142857142857,
0.35, 0, 0, 0.712765957446808, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.25, 0, 0, 0, 0, 0, 0.5, 0.5,
0, 0, 0.7, 1, 1, 0.375, 0.723076923076923, 0, 0.738636363636364
), CumulativePointsPercent = c(0, 0, 0, 0, 0, 0, 0, 0.185714285714286,
0.254545454545455, 0.235294117647059, 0.235294117647059,
0.10958904109589, 0, 0.85, 0.8, 0.533333333333333, 0.55,
0.563636363636364, 0, 0.857142857142857, 0.623076923076923,
0.476470588235294, 0.476470588235294, 0.600558659217877,
0, 1, 0.0666666666666667, 0.0666666666666667, 0.0461538461538462,
0.0461538461538462, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0.25, 0.166666666666667, 0.0555555555555556, 0.05, 0.0454545454545455,
0, 0.5, 0.5, 0.166666666666667, 0.15, 0.2, 1, 1, 0.615384615384615,
0.669230769230769, 0.621428571428571, 0.666666666666667),
RiskEstimate = c(0.627717786405816, 0.986868933315635, 0.986687587608184,
0.993909863003438, 0.997123961252086, 0.995862152216296,
0.914011371723269, 0.925359536086114, 0.902625588346349,
0.956922151061089, 0.977244888475535, 0.975006380719003,
0.215420992232115, 0.174623555825523, 0.241380495376484,
0.699712463799006, 0.692014530298594, 0.697966901130338,
0.765071150059092, 0.763071307309743, 0.767261726128078,
0.835918063362269, 0.854949153314029, 0.805318343915736,
0.792873572656207, 0.790581615380765, 0.82622599277251, 0.9330287497742,
0.965763061363497, 0.951226314109191, 0.851355921713566,
0.991081300877175, 0.989671569185701, 0.995402298000919,
0.997671718747865, 0.996593366142757, 0.738690043138604,
0.865412845144037, 0.831369850200541, 0.93845410260835, 0.968400480533385,
0.9533338828382, 0.624930735381371, 0.981915016747928, 0.985037736895337,
0.994680902796769, 0.996907588471311, 0.995388109404559,
0.887995464972052, 0.970620002831325, 0.97136665697772, 0.992618626388727,
0.99543249839328, 0.992149889176406, 0.923802324633255, 0.984464950934932,
0.978726967214146, 0.971473084822075, 0.97886220009245, 0.979311013989987
), RiskBin = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), InterventionID = c(NA, 26L, NA, NA, NA,
NA, NA, NA, NA, NA, 50L, NA, NA, NA, NA, NA, 73L, NA, NA,
NA, NA, NA, 56L, NA, NA, NA, NA, 46L, NA, NA, NA, 33L, NA,
NA, NA, NA, 15L, NA, NA, 43L, 53L, NA, NA, NA, NA, NA, 71L,
NA, NA, NA, NA, NA, 78L, NA, NA, 36L, NA, NA, 80L, NA), InterventionType = structure(c(NA,
2L, NA, NA, NA, NA, NA, NA, NA, NA, 3L, NA, NA, NA, NA, NA,
2L, NA, NA, NA, NA, NA, 3L, NA, NA, NA, NA, 3L, NA, NA, NA,
2L, NA, NA, NA, NA, 3L, NA, NA, 3L, 2L, NA, NA, NA, NA, NA,
2L, NA, NA, NA, NA, NA, 2L, NA, NA, 3L, NA, NA, 3L, NA), .Label = c("",
"At-Risk Form", "Email", "Other", "Phone"), class = "factor")), .Names = c("UserID",
"CourseID", "WeekInCourse", "WeekPostCount", "WeekLoginCount",
"WeekPointsPercent", "CumulativePointsPercent", "RiskEstimate",
"RiskBin", "InterventionID", "InterventionType"), row.names = c(NA,
60L), class = "data.frame")