用于转换货币
# data
df <- data.frame(sal = c("$100,001 - $150,000" , "over $150,000" ,
"$25,000"), educ = c("High School Diploma", "Current Undergraduate",
"PhD"),stringsAsFactors=FALSE)
# Remove comma and dollar sign
temp <- gsub("[,$]","", df$sal)
# remove text
temp <- gsub("[[:alpha:]]","", temp)
# get average over range
df$ave.sal <- sapply(strsplit(temp , "-") , function(i) mean(as.numeric(i)))
对于您的教育水平 - 如果您想要数字
df$educ.f <- as.numeric(factor(df$educ , levels=c("High School Diploma" ,
"Current Undergraduate", "PhD")))
df
# sal educ ave.sal educ.f
# 1 $100,001 - $150,000 High School Diploma 125000.5 1
# 2 over $150,000 Current Undergraduate 150000.0 2
# 3 $25,000 PhD 25000.0 3
编辑
缺少 / NA 值应该无关紧要
# Data that includes missing values
df <- data.frame(sal = c("$100,001 - $150,000" , "over $150,000" ,
"$25,000" , NA), educ = c(NA, "High School Diploma",
"Current Undergraduate", "PhD"),stringsAsFactors=FALSE)
重新运行上述命令得到
df
# sal educ ave.sal educ.f
# 1 $100,001 - $150,000 <NA> 125000.5 NA
# 2 over $150,000 High School Diploma 150000.0 1
# 3 $25,000 Current Undergraduate 25000.0 2
# 4 <NA> PhD NA 3