我有以下数据框https://www.dropbox.com/s/c02qu7uobvrc8ku/college_Rda
这是数据样本:(copy+paste
'able)
educational_history <- structure(list(SCH_COLLEGE_STATUS_1997_09 = structure(c(1L, 1L,
1L, 1L, 5L, 1L, 1L, 5L, 5L, 5L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_1998_09 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_1999_09 = structure(c(3L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2000_09 = structure(c(3L,
3L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2001_09 = structure(c(3L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2002_09 = structure(c(3L,
3L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2003_09 = structure(c(1L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2004_09 = structure(c(1L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2005_09 = structure(c(1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 3L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2006_09 = structure(c(1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2007_09 = structure(c(1L,
1L, 1L, 1L, 1L, 3L, 1L, 4L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2008_09 = structure(c(1L,
1L, 1L, 1L, 1L, 3L, 1L, 4L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2009_09 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2010_09 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 5L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2011_09 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), PUBID = c(1,
2, 3, 4, 5, 6, 7, 8, 9, 10)), .Names = c("SCH_COLLEGE_STATUS_1997_09",
"SCH_COLLEGE_STATUS_1998_09", "SCH_COLLEGE_STATUS_1999_09", "SCH_COLLEGE_STATUS_2000_09",
"SCH_COLLEGE_STATUS_2001_09", "SCH_COLLEGE_STATUS_2002_09", "SCH_COLLEGE_STATUS_2003_09",
"SCH_COLLEGE_STATUS_2004_09", "SCH_COLLEGE_STATUS_2005_09", "SCH_COLLEGE_STATUS_2006_09",
"SCH_COLLEGE_STATUS_2007_09", "SCH_COLLEGE_STATUS_2008_09", "SCH_COLLEGE_STATUS_2009_09",
"SCH_COLLEGE_STATUS_2010_09", "SCH_COLLEGE_STATUS_2011_09", "PUBID"
), row.names = c(NA, 10L), class = "data.frame")
我想使用该数据生成一个新的数据框。
我只需要两个字段:PUBID 和就读 4 年制大学的第一年。有关年份的信息在列的名称中。我试过了:
FirstYear4C <- function(ID) {
ndX=which(educational_history$PUBID==ID)
educational_historyNdX=educational_history[ndX,]
year=NA
if (educational_historyNdX$SCH_COLLEGE_STATUS_1997_09=="Enrolled in 4-year college"){
year=1997
return(year)
}
if (educational_historyNdX$SCH_COLLEGE_STATUS_1998_09=="Enrolled in 4-year college"){
year=1998
return(year)
}
if (educational_historyNdX$SCH_COLLEGE_STATUS_1999_09=="Enrolled in 4-year college"){
year=1999
return(year)
}
if (educational_historyNdX$SCH_COLLEGE_STATUS_2000_09=="Enrolled in 4-year college"){
year=2000
return(year)
}
return(NA)
}
FirstYear<-unlist(lapply(X=educational_history$PUBID,FirstYear4C))
FourYearCollege<-data.frame(PUBID=educational_history$PUBID,
FirstYear=FirstYear)
我确信有更好的方法来编写该函数。必须逐列复制和粘贴似乎非常低效。
PUBID 1stYear4YC
1 1999
2 2000
...
6 2000