0

new数据集

"1" "A.Kejriwal Sena"
"2" "Bhanwarlal Sharma"
"3" "Millennium Post"
"4" ""
"5" "Mushkil hai Zindagi"
"6" ""
"7" "niraj"
"8" ""
"9" "Dharmender Malik"
"10"    "S. M. Malik"
"11"    "Hocalwire"
"12"    "mansoor"
"13"    "PRRRK"
"14"    "Narendra Solanki"
"15"    "Dsekid"
"16"    "Rutvik Subhedar"
"17"    "Liberator Furiosa"
"18"    "The Anarchy Man  "
"19"    "Swamidutta"
"20"    "Phaneendra"
"21"    "Rutvik Subhedar"
"22"    "Rutvik Subhedar"
"23"    "S.Mehrotra"
"24"    "Mrigen Sharma"
"25"    "Arvind Kejriwal"
"26"    "Bitter Pills"
"27"    "Tarush Bhalla"
"28"    "Americai Narayanan"
"29"    "Rupsa Sata Durga"
"30"    "Dr Sudhakar Phulekar"
"31"    "!"
"32"    "Lala Rahul"
"33"    "Rakesh panda"
"34"    "Jayesh Mehta"
"35"    "Kuldeep Bhan"
"36"    "K K Raman"
"37"    "Kaliya"
"38"    ""
"39"    "Rana Dev Rajbanshi"
"40"    "Rahul Nirmal"
"41"    "Satya Prakash Tyagi"
"42"    "Ashutosh Singh"
"43"    "JMS:"
"44"    ""
"45"    "akif bhati"
"46"    "Arun Joseph"
"47"    "#IamAAP"
"48"    "suryanraju18 jaihind"
"49"    "PANKAJ YADAV"
"50"    "Satya Prakash Tyagi"
"51"    "Rohit TK"
"52"    "Adarsh Choudhary"
"53"    "Xtreme Nationalist"
"54"    "KCAggarwal"
"55"    "NANDKISHORE SHARMA"
"56"    "Uttam Dutta"
"57"    "P K Agarwal"
"58"    "Uttam Dutta"
"59"    "Deol"
"60"    "Rkmishra"
"61"    "Suneel Kumar"
"62"    "Rkmishra"
"63"    "Mohsin"
"64"    "Ranjeet Krishna"
"65"    "Jaynul Haq Choudhury"
"66"    "Vitthal Mundra"
"67"    "Nagesh H"
"68"    "Suneel Kumar"
"69"    "AHMAD KAMAL"
"70"    "sanjeev"
"71"    "Kaliya"
"72"    "Dinesh"
"73"    "Zoher Malkapurwala"
"74"    "suryanraju18 jaihind"
"75"    "Anand"
"76"    "Xtreme Nationalist"
"77"    "Aalamjeet Rangi"
"78"    ""
"79"    "Vimlendra Vimal"
"80"    "Rana. R.S"
"81"    "RaviVisvesvaraPrasad"
"82"    "Virupaksha hs"
"83"    "Siddharth"
"84"    "Millennium Post"
"85"    "Kishanpal"
"86"    "Santhosh Kolkunda"
"87"    "Surabhi Agarwal"
"88"    "Hocalwire"
"89"    "Rutvik Subhedar"
"90"    "Informed Indians "
"91"    "P.r Meghwanshi"
"92"    "Rajat "
"93"    "Zooni khan"
"94"    "real indian"
"95"    "Rahul Nirmal"
"96"    "P.r Meghwanshi"
"97"    "Bibhav"
"98"    ""
"99"    ""
"100"   "K Padma Rani"
"101"   "Ganesha"
"102"   "Xtreme Nationalist"
"103"   "love humanity"
"104"   "JeSuis Rohith Vemula"
"105"   "samira"
"106"   "Munendr Sharma"
"107"   "Rawat Singh Tomar"
"108"   "Raspal kaur"
"109"   "Vivek Gupta"
"110"   "Syed Zia"
"111"   "suryanraju18 jaihind"
"112"   "{AAP "
"113"   "Shravan Mansanpally"
"114"   "sghosh"
"115"   "Lakshmi Srikanth"
"116"   "Sanjaybjp"
"117"   "Razzak Ali Khan"
"118"   "Deepanita Mazumder"
"119"   "belvin vaz"
"120"   "Amit Kumar"
"121"   "#SherDilKejriwal"
"122"   "anuj"
"123"   "Sambi Reddy"
"124"   "Ranjan Kumar Jha"
"125"   "Mohsin"
"126"   "JeSuis Rohith Vemula"
"127"   "Vivek"
"128"   "Dolli"
"129"   "Bharat_Mata_Ki_Jay"
"130"   "Anantkumar"
"131"   "Flower"
"132"   "ARCHANA SINGH"
"133"   "avinash kumar"
"134"   ""
"135"   "Rajesh Mittal"
"136"   "Samik Banerjee"
"137"   "ASHWANI KUMAR GOYAL"
"138"   "Suneel Kumar"
"139"   "Shravan Mansanpally"
"140"   "rajA"
"141"   "Shravan Mansanpally"
"142"   "Mamta Yadav"
"143"   "Dr.Chintan Raval"
"144"   "suryanraju18 jaihind"
"145"   "Dr Sudhakar Phulekar"
"146"   "bilal motorwala"
"147"   "arif007"
"148"   "Dr Sudhakar Phulekar"
"149"   "Rakesh Jaiswal"
"150"   "Dr Sudhakar Phulekar"
"151"   "Prof. Satish Pandey"
"152"   "Mohammad Armanullah"
"153"   "KCAggarwal"
"154"   "Astha Mittal"
"155"   "Rajesh Sharma "
"156"   "Aditya"
"157"   "Rajesh Mittal"
"158"   "Anil Kumar"
"159"   "Niyati"
"160"   "Phronesis Partners"
"161"   "Anand Bhatt"
"162"   "CSS by Design"
"163"   "Naresh Rajput"
"164"   "Engineer Sid"
"165"   "Flower"
"166"   "Rebellion"
"167"   "Mebin"
"168"   "v.asish kumar"
"169"   "Tum se na ho payega!"
"170"   "Ranjan Singh"
"171"   "mohan munya rathod"
"172"   "DINDIGUL CA STUDENTS"
"173"   "Vibha Sachdeva"
"174"   "GT #MRX"
"175"   "Mitesh"
"176"   "Hobbes3103"
"177"   "Azad Swaraj1"
"178"   "NewsBoss.in"
"179"   "INDER MORWAL"
"180"   "kasani sukhadev"
"181"   "Mayur Panghaal"
"182"   "Chin_Chan"
"183"   "Amit Shukla"
"184"   "Mayur Panghaal"
"185"   "INDER MORWAL"
"186"   "mAt global"
"187"   "shamshad shaique"
"188"   "Niraj Bhatia "
"189"   "Aarti"
"190"   "Sudhir Bhardwaj "
"191"   "Abhishek Vishnoi"
"192"   "AAP Delhi Official"
"193"   "WeLove VidyutJammwal"
"194"   "Nagesh H"
"195"   "Vicky Singh Rajput"
"196"   "Lalit Kalra "
"197"   ""
"198"   "raju"
"199"   "knowAguy"
"200"   "Judie Custer"
"201"   "Gibreel Farishta"
"202"   "Onkar Pandey"
"203"   "Sampath Simon"
"204"   "Thammegowda M D"
"205"   "Sickular indian"
"206"   "Truthful"
"207"   "ajay Kumar nirala"
"208"   "ajay Kumar nirala"
"209"   "Farhan"
"210"   "AAPSuratVarachha"
"211"   "siva kumar jagirapu"
"212"   "uniindianews"
"213"   "Rajendra Pande"
"214"   "Kirti Bhushan"
"215"   "Sabrina MzTrueHEART"
"216"   "krishna ts"
upto  3683 rows

我应用了:name2sex此数据集上的函数和错误:

Error in `$<-.data.frame`(`*tmp*`, "gender", value = c(NA, NA, NA, NA,  : 
   replacement has 3961 rows, data has 3683`.

我用了:

library(qdap)
names <- as.character(new$name)
gender <- name2sex(names)
length(gender)
#[1]3961

但是我的新数据集有 3683 行,而不是 3961。它为描述 a 和 e 提供输出,因为它仅由单个单词组成,而不是整个数据集。我尝试了另一种方式,即

library(qdap)
names <- as.character(new$name)
new$gender <- name2sex(names)
#Error in `$<-.data.frame`(`*tmp*`, "gender", value = c(NA, NA, NA, NA,  : 
# replacement has 3961 rows, data has 3683
4

1 回答 1

3

你可以做

new$gender <- genderdata::ssa_national %>% 
  filter(name %in% tolower(names)) %>% 
  group_by(name) %>% 
  dplyr::summarise(
    female = sum(female), 
    male = sum(male)
  ) %>% 
  mutate(
    proportion_male = round((male/(male + female)), digits = 4), 
    proportion_female = round((female/(male + female)), digits = 4)
  ) %>% 
  mutate(gender = ifelse(proportion_female == 0.5, "either", ifelse(proportion_female > 0.5, "female", "male"))) %>%
  rename(join_name = name) %>% 
  {full_join(data_frame(name = names, join_name = tolower(names)), ., by = "join_name")} %>% 
  select(name, proportion_male, proportion_female, gender) %>% 
  .$gender

这基本上就是在幕后发生的事情——debug(name2sex)你可以自己检查一下。

另请注意,您应该为函数提供名字而不是任意名称。

于 2017-03-11T13:41:52.007 回答