I have a Data Frame that contains, between other things, the following fields: userX, Time1, Time2, Time3. The number of observations is 2000.
I have a function that has as inputs userX, Time1, Time2, Time3 and return a data frame with 1 observation and 19 variables.
I want to apply that function to all the observations of the first data frame to make a new data frame with 2000 observations and 19 variables.
I thought about using lapply, but if I understand correctly, it only takes one variable.
Can somebody point me in the right direction?
This is the code I have right now:
# Make Data Frame for video actions between given times for user X
DataVideoActionT <- function (userX, Time1, Time2, Time3){
#Get data for user X
videoActionsX<-subset(videoLectureActions, username==userX)
#Time1 = before first attempt
videoActionsX_T1<-subset(videoActionsX, eventTimestamp<Time1)
#Time2 = before best attemp
videoActionsX_T2<-subset(videoActionsX, eventTimestamp<Time2 & eventTimestamp>Time1)
#Time3= before last attemp
videoActionsX_T3<-subset(videoActionsX, eventTimestamp<Time3 & eventTimestamp>Time1)
error1 = sum(videoActionsX_T1$type==" error ")
pause1 = sum(videoActionsX_T1$type==" pause ")
play1 = sum(videoActionsX_T1$type==" play ")
ratechange1 = sum(videoActionsX_T1$type==" ratechange ")
seeked1 = sum(videoActionsX_T1$type==" seeked ")
stalled1 = sum(videoActionsX_T1$type==" stalled ")
error2 = sum(videoActionsX_T2$type==" error ")
pause2 = sum(videoActionsX_T2$type==" pause ")
play2 = sum(videoActionsX_T2$type==" play ")
ratechange2 = sum(videoActionsX_T2$type==" ratechange ")
seeked2 = sum(videoActionsX_T2$type==" seeked ")
stalled2 = sum(videoActionsX_T2$type==" stalled ")
error3 = sum(videoActionsX_T3$type==" error ")
pause3 = sum(videoActionsX_T3$type==" pause ")
play3 = sum(videoActionsX_T3$type==" play ")
ratechange3 = sum(videoActionsX_T3$type==" ratechange ")
seeked3 = sum(videoActionsX_T3$type==" seeked ")
stalled3 = sum(videoActionsX_T3$type==" stalled ")
data<-data.frame(anon_ID=userX,
error1 = error1,
pause1 = pause1,
play1 = play1,
ratechange1 = ratechange1,
seeked1=seeked1,
stalled1=stalled1,
error2 = error2,
pause2 = pause2,
play2 = play2,
ratechange2 = ratechange2,
seeked2 =seeked2,
stalled2 = stalled2,
error3 = error3,
pause3 = pause3,
play3 = play3,
ratechange3 = ratechange3,
seeked3 = seeked3,
stalled3 = stalled3)
return(data)
}
videoLectureActions<-structure(list(username = c("exampleID1", "exampleID1", "exampleID1",
"exampleID2", "exampleID2", "exampleID2", "exampleID3", "exampleID3",
"exampleID3", "exampleID3"), currentTime = c("103.701247", "103.701247",
"107.543877", "107.543877", "116.456507", "116.456507", "119.987188",
"177.816693", "183.417124", "183.417124"), playbackRate = c("null",
"null", "null", "null", "null", "null", "null", "null", "null",
"null"), pause = c("true", "false", "true", "false", "true",
"false", "true", "false", "true", "false"), error = c("null",
"null", "null", "null", "null", "null", "null", "null", "null",
"null"), networkState = c("1", "1", "1", "1", "1", "1", "1",
"1", "1", "1"), readyState = c("4", "4", "4", "4", "4", "4",
"4", "4", "4", "4"), lectureID = c("exampleLectureID1", "exampleLectureID1",
"exampleLectureID1", "exampleLectureID1", "exampleLectureID1",
"exampleLectureID1", "exampleLectureID1", "exampleLectureID1",
"exampleLectureID1", "exampleLectureID1"), eventTimestamp = c("2013-03-04 18:51:49",
"2013-03-04 18:51:50", "2013-03-04 18:51:54", "2013-03-04 18:51:56",
"2013-03-04 18:52:05", "2013-03-04 18:52:07", "2013-03-04 18:52:11",
"2013-03-04 18:59:17", "2013-03-04 18:59:23", "2013-03-04 18:59:31"
), initTimestamp = c("2013-03-04 18:44:15", "2013-03-04 18:44:15",
"2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04 18:44:15",
"2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04 18:44:15",
"2013-03-04 18:44:15", "2013-03-04 18:44:15"), type = c(" pause ",
" play ", " pause ", " play ", " pause ", " play ", " pause ",
" play ", " pause ", " play "), prevTime = c("103.701247 ", "103.701247 ",
"107.543877 ", "107.543877 ", "116.456507 ", "116.456507 ", "119.987188 ",
"177.816693 ", "183.417124 ", "183.417124 ")), .Names = c("username",
"currentTime", "playbackRate", "pause", "error", "networkState",
"readyState", "lectureID", "eventTimestamp", "initTimestamp",
"type", "prevTime"), row.names = c(1L, 2L, 5L, 6L, 17L, 21L,
28L, 936L, 957L, 988L), class = "data.frame")
data<-structure(list(anon_ID = c("exampleID1", "exampleID2", "exampleID3" ), maxGrade = c(10, 5, 10), firstGrade = c(10, 5, 8), lastGrade = c(10, 5, 10), total_submissions = c(1L, 1L, 3L), Time1 = structure(c(1361993741, 1362356090, 1362357401), class = c("POSIXct", "POSIXt"), tzone = ""), TimeM = structure(c(1361993741, 1362356090, 1362492744), class = c("POSIXct", "POSIXt"), tzone = ""), TimeL = structure(c(1361993741, 1362356090, 1362492744), class = c("POSIXct", "POSIXt"), tzone = "")), .Names = c("anon_ID", "maxGrade", "firstGrade", "lastGrade", "total_submissions", "Time1", "TimeM", "TimeL"), row.names = c(NA, 3L), class = "data.frame")
library(foreach)
library(doMC)
registerDoMC(2) #change the 2 to your number of CPU cores
n <- nrow(data)
res <- list("vector", n)
foreach(i=1:n, .verbose=FALSE, .combine=rbind) %do% {
res[[i]] <- with(data, DataVideoActionT(anon_ID[i], Time1[i], TimeM[i], TimeL[i]))
}
test<-do.call(rbind, res)
I have 3 questions.
How can I make foreach not print to the console? This is how it looks when I run it
foreach(i=1:n, .verbose=FALSE, .combine=rbind) %do% { + res[[i]] <- with(data, DataVideoActionT(anon_ID[i], Time1[i], TimeM[i], TimeL[i])) + } anon_ID error1 pause1 play1 ratechange1 seeked1 stalled1 1 exampleID1 0 0 0 0 0 0 2 exampleID2 0 0 0 0 0 0 3 exampleID3 0 0 0 0 0 0 error2 pause2 play2 ratechange2 seeked2 stalled2 error3 pause3 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 3 0 2 2 0 0 0 0 2 play3 ratechange3 seeked3 stalled3 1 0 0 0 0 2 0 0 0 0 3 2 0 0 0
I don't want that in the console with thousands of observations.
I want to run this in parallel, I i change the %do% for %dopar% the code stop working. Instead of getting test with 3 observations and 19 variables I get a 2x1 character matrix
Is there a better way of doing this? If so, could you explain why is better?
Thanks!