这是基于 Thomas 建议尝试 rpubmed 的答案。它从有问题的 DOI 列表开始,使用 RISmed 中的 EUtilsSummary 函数找到匹配的 PubMed ID 编号,然后使用从Github 为 rpubmed修改并在下面复制的代码获取与这些相关的期刊数据。很抱歉编辑了 rpubmed 代码,但第 44 行的对象似乎没有定义或必不可少,所以我把它们拿出来了。
library(RCurl); library(XML); library(RISmed); library(multicore)
# dummy list of 5 DOIs. I actually have 2012, hence all the multicoring below
dois <- c("10.1371/journal.pone.0046711", "10.1371/journal.pone.0046681", "10.1371/journal.pone.0046643", "10.1371/journal.pone.0041465", "10.1371/journal.pone.0044562")
# Get the PubMed IDs
res <- mclapply(1:length(dois), function(x) EUtilsSummary(dois[x]))
ids<-sapply(res,QueryId)
######## rpubmed functions from https://github.com/rOpenHealth/rpubmed/blob/master/R/rpubmed_fetch.R
fetch_in_chunks <- function(ids, chunk_size = 500, delay = 0, ...){
Sys.sleep(delay * 3600) # Wait for appropriate time for the server.
chunks <- chunker(ids, chunk_size)
Reduce(append, lapply(chunks, function(x) pubmed_fetch(x, ...)))
}
pubmed_fetch <- function(ids, file_format = "xml", as_r_object = TRUE, ...){
args <- c(id = paste(ids, collapse = ","), db = "pubmed", rettype = file_format, ...)
url_args <- paste(paste(names(args), args, sep="="), collapse = "&")
base_url <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=full"
url_string <- paste(base_url, url_args, sep = "&")
records <- getURL(url_string)
#NCBI limits requests to three per second
Sys.sleep(0.33)
if(as_r_object){
return(xmlToList(xmlTreeParse(records, useInternalNodes = TRUE)))
} else return(records)
}
chunker <- function(v, chunk_size){
split(v, ceiling(seq_along(v)/chunk_size))
}
###### End of rpubmed functions
d<-fetch_in_chunks(ids)
j<-character(0)
for(i in 1:2012) j[i]<-as.character(d[[i]][[1]][[5]][[1]][[3]]) # the tortuous path to the journal name