1

我使用 GA 包中的遗传算法函数在非线性模型中进行参数估计。我使用了一个模拟数据,其中所有变量都存在多重共线性问题。当我使用 GA 函数时,我发现我用 GA 函数估计的参数有偏差。你能解释一下为什么会这样吗?使用具有多重共线性数据的 GA 函数估计参数的解决方案是什么?谢谢。

library("lestat")           
beta1<-c(0.1835,0.5932,-0.0065,0.4153,-0.0431)
beta2<-c(0.1,0.2,0.3,0.4)

corr<-matrix(1:64,8,8)
for (i in 1:8){
  for (j in 1:8){
    ifelse (i==j, corr[i,j]<-1, corr[i,j]<-0.8)
  }
}
corr

dist<-mvrnorm(10000,rep(50,8), corr)
dist
pop1<-dist[,1]
pop2<-dist[,2]
pop3<-dist[,3]
pop4<-dist[,4]
pop5<-dist[,5]
pop6<-dist[,6]
pop7<-dist[,7]
pop8<-dist[,8]
resp<-exp(beta1[1])*(pop1^beta1[2])*(pop2^beta1[3])*(pop3^beta1[4])*
      (pop4^beta1[5])*(pop5^beta2[1])*(pop6^beta2[2])*(pop7^beta2[3])*
      (pop8^beta2[4])+e

popgab2<-data.frame(resp,pop1,pop2,pop3,pop4,pop5,pop6,pop7,pop8)
popgab2
min(dist)
max(dist)

#-----------------------------------------------------------------

e<-rnorm(500,0,1)
x1<-sample(pop1,500,replace=FALSE, prob=NULL)
x2<-sample(pop2,500,replace=FALSE, prob=NULL)
x3<-sample(pop3,500,replace=FALSE, prob=NULL)
x4<-sample(pop4,500,replace=FALSE, prob=NULL)
x5<-sample(pop5,500,replace=FALSE, prob=NULL)
x6<-sample(pop6,500,replace=FALSE, prob=NULL)
x7<-sample(pop7,500,replace=FALSE, prob=NULL)
x8<-sample(pop8,500,replace=FALSE, prob=NULL)
y2<-exp(beta1[1])*(x1^beta1[2])*(x2^beta1[3])*(x3^beta1[4])*
    (x4^beta1[5])*(x5^beta2[1])*(x6^beta2[2])*(x7^beta2[3])*
    (x8^beta2[4])+e

data2<-data.frame(y2,x1,x2,x3,x4,x5,x6,x7,x8)
data2

#------------------------------------------------------------------

library(GA)
fit.cd <- function(data=data2, a, b1, b2, b3, b4, b5, b6, b7, b8){
  attach(data2, warn.conflicts=F)
  Y_hat <- exp(a)  * x1^b1 * x2^b2 * x3^b3 * x4^b4 * x5^b5 * x6^b6 *
           x7^b7 * x8^b8
  SSE = t(y2-Y_hat) %*% (y2-Y_hat) #matrix formulation for SSE
  detach(data2)
  return(SSE)
}
Sys.time()->a
GA3<-ga(type="real-valued",
        min=c(-10,-1,-1,-1,-1,-1,-1,-1,-1),
        max=c(10,1,1,1,1,1,1,1,1),
        fitness=function(b) - fit.cd(data2,b[1],b[2],b[3],b[4],b[5],b[6],b[7],b[8],b[9]),
        nBits=32,
        popSize=1000,
        maxiter=1000,
        run=1000,
        pcrossover=0.8,
        pmutation=0.05,
        elitism = base::max(1, round(1000*0.05)),
        maxfitness=Inf,
        names = NULL,
        suggestions = NULL,
        keepBest = FALSE,
        parallel=FALSE,
        monitor=gaMonitor,
        seed=NULL
)
Sys.time()->b
summary(GA3)
plot(GA3)
waktu2<-b-a
solusi<-print(summary(GA3)$solution)
4

0 回答 0