1

我的任务是让一些代码在工作中运行得更快,但我遇到了并行计算的障碍。我有一个原始的 for 循环,我想并行运行。在没有 for 循环的情况下重现代码似乎最容易,而是使用 lapply(),但是它的运行时间与循环一样长。使用 parLapply() 重写代码给了我一个更快的结果,尽管结果与 lapply() 和原始 for 循环的结果不同,两者都具有可比性。该计划的目的是计算各个国家在每个到期日的现金利差(即利差曲线)。

“bonddata.csv”链接:http ://www.mediafire.com/download/hfcdbryhedpso77/bonddata.csv “weocountries.csv”链接: http ://www.mediafire.com/download/7x15csw7lwwataj/weocountries.csv

加载所需的数据和库:

library(reshape2)
library(ggplot2)
library(Rblpapi)
library(xts)
library(YieldCurve)
library(parallel)
library(snow)
library(countrycode)
library(gridExtra)
library(compare)

#Read in the data
countries<-read.csv("weocountries.csv",stringsAsFactors=FALSE,na.strings="")
bonddata <- read.csv("bonddata.csv")
bonddata$iso <- as.character(bonddata$iso)
bonddata$maturity <- as.Date(as.character(bonddata$maturity))
rownames(bonddata) <- as.character(bonddata$X)
bonddata$X <- NULL
bonddata <- na.omit(bonddata)

创建计算现金点差的函数:

calcspreadcurve<-function(data,maturities){
  numbonds<-nrow(data)
  if(numbonds<=3){
    ForceIntercept<-.5*min(data$spread)
    mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
    fits<-mod$coefficients*log(100*maturities)+ForceIntercept
    return(fits)
  }
  else {
    mod<-try(Nelson.Siegel(data$spread,12*data$averagelife),TRUE)
    if(is.numeric(mod)){
      fits<-as.vector(NSrates(xts(mod,Sys.Date()),12*maturities))
    } else {
      ForceIntercept<-.8*mean(data$spread)
      mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
      fits<-mod$coefficients*log(100*maturities)+ForceIntercept
    }       

    if(min(fits)<0){
      ForceIntercept<-.8*mean(data$spread)
      mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
      fits<-mod$coefficients*log(100*maturities)+ForceIntercept
    } 

    if(mean(fits)>2*mean(data$spread)){
      ForceIntercept<-.8*mean(data$spread)
      mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
      fits<-mod$coefficients*log(100*maturities)+ForceIntercept
    }

    return(fits)
  }
}

# Calculate standardized USD spread curves for all countries
bondcountries<-unique(bonddata$iso)
maturities<-c(1,2,5,10,20,30,50,100)

原代码:

#Original for loop
cashspreads<-NULL
for(i in 1:length(bondcountries)){
  curve<-calcspreadcurve(bonddata[bonddata$iso==bondcountries[i],],maturities)
  cashspreads<-rbind(cashspreads,curve)
}
rownames(cashspreads)<-sapply(bondcountries,function(x) countries$ISO3[which(countries$ISO2==x)])
colnames(cashspreads)<-c("1Y","2Y","5Y","10Y","20Y","30Y","50Y","100Y")

lapply() 代码:

#What loop is doing (lapply)
cashspreads2 <- t(as.data.frame(lapply(split(bonddata,bonddata$iso),function(data,m=maturities){
  numbonds<-nrow(data)
  if(numbonds<=3){
    ForceIntercept<-.5*min(data$spread)
    mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
    fits<-mod$coefficients*log(100*m)+ForceIntercept
    return(fits)
  }
  else {
    mod<-try(Nelson.Siegel(data$spread,12*data$averagelife),TRUE)
    if(is.numeric(mod)){
      fits<-as.vector(NSrates(xts(mod,Sys.Date()),12*m))
    } else {
      ForceIntercept<-.8*mean(data$spread)
      mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
      fits<-mod$coefficients*log(100*m)+ForceIntercept
    }       
    if(min(fits)<0){
      ForceIntercept<-.8*mean(data$spread)
      mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
      fits<-mod$coefficients*log(100*m)+ForceIntercept
    } 
    if(mean(fits)>2*mean(data$spread)){
      ForceIntercept<-.8*mean(data$spread)
      mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
      fits<-mod$coefficients*log(100*m)+ForceIntercept
    }
    return(fits)
  }
})))
rownames(cashspreads2)<-countrycode(rownames(cashspreads2),"iso2c","iso3c")
colnames(cashspreads2) <- c("1Y","2Y","5Y","10Y","20Y","30Y","50Y","100Y")

parLapply() 代码:

#Run in parallel (parLapply)
cl <- makeCluster(type="SOCK",detectCores()-1)
cashspreads3 <- t(as.data.frame(parLapply(cl,split(bonddata,bonddata$iso),function(data,m=c(1,2,5,10,20,30,50,100)){
  numbonds<-nrow(data)
  if(numbonds<=3){
    ForceIntercept<-.5*min(data$spread)
    mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
    fits<-mod$coefficients*log(100*m)+ForceIntercept
    return(fits)
  }
  else {
    mod<-try(Nelson.Siegel(data$spread,12*data$averagelife),TRUE)
    if(is.numeric(mod)){
      fits<-as.vector(NSrates(xts(mod,Sys.Date()),12*m))
    } else {
      ForceIntercept<-.8*mean(data$spread)
      mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
      fits<-mod$coefficients*log(100*m)+ForceIntercept
    }       
    if(min(fits)<0){
      ForceIntercept<-.8*mean(data$spread)
      mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
      fits<-mod$coefficients*log(100*m)+ForceIntercept
    } 
    if(mean(fits)>2*mean(data$spread)){
      ForceIntercept<-.8*mean(data$spread)
      mod<-lm(I(data$spread-ForceIntercept)~0+log(100*data$averagelife))
      fits<-mod$coefficients*log(100*m)+ForceIntercept
    }
    return(fits)
  }
})))
stopCluster(cl)
rownames(cashspreads3)<-countrycode(rownames(cashspreads3),"iso2c","iso3c")
colnames(cashspreads3) <- c("1Y","2Y","5Y","10Y","20Y","30Y","50Y","100Y")

数据框比较:

compare(cashspreads[order(rownames(cashspreads)),],cashspreads2[order(rownames(cashspreads2)),])
compare(cashspreads[order(rownames(cashspreads)),],cashspreads3[order(rownames(cashspreads3)),])

如果代码为您正确运行,则生成的数据帧“cashspreads”和“cashspreads2”将无法与数据帧“cashspreads3”进行比较。

关于为什么这不会产生相同的数据帧的任何想法?

4

0 回答 0