我的任务是让一些代码在工作中运行得更快,但我遇到了并行计算的障碍。我有一个原始的 for 循环,我想并行运行。在没有 for 循环的情况下重现代码似乎最容易,而是使用 lapply(),但是它的运行时间与循环一样长。使用 parLapply() 重写代码给了我一个更快的结果,尽管结果与 lapply() 和原始 for 循环的结果不同,两者都具有可比性。该计划的目的是计算各个国家在每个到期日的现金利差(即利差曲线)。
“bonddata.csv”链接:http ://www.mediafire.com/download/hfcdbryhedpso77/bonddata.csv “weocountries.csv”链接: http ://www.mediafire.com/download/7x15csw7lwwataj/weocountries.csv
加载所需的数据和库:
library(reshape2)
library(ggplot2)
library(Rblpapi)
library(xts)
library(YieldCurve)
library(parallel)
library(snow)
library(countrycode)
library(gridExtra)
library(compare)
#Read in the data
countries<-read.csv("weocountries.csv",stringsAsFactors=FALSE,na.strings="")
bonddata <- read.csv("bonddata.csv")
bonddata$iso <- as.character(bonddata$iso)
bonddata$maturity <- as.Date(as.character(bonddata$maturity))
rownames(bonddata) <- as.character(bonddata$X)
bonddata$X <- NULL
bonddata <- na.omit(bonddata)
创建计算现金点差的函数:
calcspreadcurve<-function(data,maturities){
numbonds<-nrow(data)
if(numbonds<=3){
ForceIntercept<-.5*min(data$spread)
mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
fits<-mod$coefficients*log(100*maturities)+ForceIntercept
return(fits)
}
else {
mod<-try(Nelson.Siegel(data$spread,12*data$averagelife),TRUE)
if(is.numeric(mod)){
fits<-as.vector(NSrates(xts(mod,Sys.Date()),12*maturities))
} else {
ForceIntercept<-.8*mean(data$spread)
mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
fits<-mod$coefficients*log(100*maturities)+ForceIntercept
}
if(min(fits)<0){
ForceIntercept<-.8*mean(data$spread)
mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
fits<-mod$coefficients*log(100*maturities)+ForceIntercept
}
if(mean(fits)>2*mean(data$spread)){
ForceIntercept<-.8*mean(data$spread)
mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
fits<-mod$coefficients*log(100*maturities)+ForceIntercept
}
return(fits)
}
}
# Calculate standardized USD spread curves for all countries
bondcountries<-unique(bonddata$iso)
maturities<-c(1,2,5,10,20,30,50,100)
原代码:
#Original for loop
cashspreads<-NULL
for(i in 1:length(bondcountries)){
curve<-calcspreadcurve(bonddata[bonddata$iso==bondcountries[i],],maturities)
cashspreads<-rbind(cashspreads,curve)
}
rownames(cashspreads)<-sapply(bondcountries,function(x) countries$ISO3[which(countries$ISO2==x)])
colnames(cashspreads)<-c("1Y","2Y","5Y","10Y","20Y","30Y","50Y","100Y")
lapply() 代码:
#What loop is doing (lapply)
cashspreads2 <- t(as.data.frame(lapply(split(bonddata,bonddata$iso),function(data,m=maturities){
numbonds<-nrow(data)
if(numbonds<=3){
ForceIntercept<-.5*min(data$spread)
mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
fits<-mod$coefficients*log(100*m)+ForceIntercept
return(fits)
}
else {
mod<-try(Nelson.Siegel(data$spread,12*data$averagelife),TRUE)
if(is.numeric(mod)){
fits<-as.vector(NSrates(xts(mod,Sys.Date()),12*m))
} else {
ForceIntercept<-.8*mean(data$spread)
mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
fits<-mod$coefficients*log(100*m)+ForceIntercept
}
if(min(fits)<0){
ForceIntercept<-.8*mean(data$spread)
mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
fits<-mod$coefficients*log(100*m)+ForceIntercept
}
if(mean(fits)>2*mean(data$spread)){
ForceIntercept<-.8*mean(data$spread)
mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
fits<-mod$coefficients*log(100*m)+ForceIntercept
}
return(fits)
}
})))
rownames(cashspreads2)<-countrycode(rownames(cashspreads2),"iso2c","iso3c")
colnames(cashspreads2) <- c("1Y","2Y","5Y","10Y","20Y","30Y","50Y","100Y")
parLapply() 代码:
#Run in parallel (parLapply)
cl <- makeCluster(type="SOCK",detectCores()-1)
cashspreads3 <- t(as.data.frame(parLapply(cl,split(bonddata,bonddata$iso),function(data,m=c(1,2,5,10,20,30,50,100)){
numbonds<-nrow(data)
if(numbonds<=3){
ForceIntercept<-.5*min(data$spread)
mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
fits<-mod$coefficients*log(100*m)+ForceIntercept
return(fits)
}
else {
mod<-try(Nelson.Siegel(data$spread,12*data$averagelife),TRUE)
if(is.numeric(mod)){
fits<-as.vector(NSrates(xts(mod,Sys.Date()),12*m))
} else {
ForceIntercept<-.8*mean(data$spread)
mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
fits<-mod$coefficients*log(100*m)+ForceIntercept
}
if(min(fits)<0){
ForceIntercept<-.8*mean(data$spread)
mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
fits<-mod$coefficients*log(100*m)+ForceIntercept
}
if(mean(fits)>2*mean(data$spread)){
ForceIntercept<-.8*mean(data$spread)
mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
fits<-mod$coefficients*log(100*m)+ForceIntercept
}
return(fits)
}
})))
stopCluster(cl)
rownames(cashspreads3)<-countrycode(rownames(cashspreads3),"iso2c","iso3c")
colnames(cashspreads3) <- c("1Y","2Y","5Y","10Y","20Y","30Y","50Y","100Y")
数据框比较:
compare(cashspreads[order(rownames(cashspreads)),],cashspreads2[order(rownames(cashspreads2)),])
compare(cashspreads[order(rownames(cashspreads)),],cashspreads3[order(rownames(cashspreads3)),])
如果代码为您正确运行,则生成的数据帧“cashspreads”和“cashspreads2”将无法与数据帧“cashspreads3”进行比较。
关于为什么这不会产生相同的数据帧的任何想法?