我正在尝试使用 R 中使用 optimx 包的收入估算的最大似然法来估计参数。我已经定义了函数,并且我提供了具有 23 个元素的初始参数向量。在 23 个向量中,第 21 个向量的值被限制为 1。
经过几次迭代,它说达到了收敛,当我试图找出参数值时,它给了我所有参数的“NA”。我的代码是
cat("\014")
data = read.csv("DRCOG.csv")
drcog = data # please make a column in the dataset indicating inc grp. Named as data$grp
param = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) #initial value of the parameter
imput = function(param) { #defining funtion
rn = nrow(drcog)
nc = ncol(drcog)
inc_thres = matrix(c(-100, 1.5, 2.0, 3.0, 4.0, 5.0, 6.0, 7.50, 10.0, 13.5, 15.0, 100.0 ),ncol = 12)
inc_grp = ncol(inc_thres) - 1
tinc_thres = t(inc_thres)
#creating matrix for reportage and group variable
# equation for reporting income
reportage = as.matrix(cbind(drcog$uno,drcog$DUGRAD, drcog$AGE5P, drcog$sero, drcog$sero, drcog$sero, drcog$sero, drcog$sero, drcog$sero, drcog$sero))
nparmr = param[1:10] #number of input variables, change in initial parameter value and equation accordingly
grp = as.matrix(cbind(drcog$sero, drcog$sero, drcog$sero, drcog$uno, drcog$NVEH, drcog$MALEP, drcog$RENTED, drcog$EMPLP, drcog$SINGUNI, drcog$NWHITE))
nparmg = param[11:21] #number of input variables in grp variable, change in initial parameter value and equation accordingly
treportage = t(reportage)
tgrp = t(grp)
btheq = cbind(treportage, tgrp)
cncol = ncol(reportage)
gammar = param[1:10] #parameter vector of regression
gammar = as.matrix(gammar)
gammal = param[11:20] #parameter vector of group
gammal = as.matrix(gammal)
#choleskey matrix as input parameter, variance-covariance matrix in the formula
omega_cvar = param[21:23]
omegaL = matrix(data = 0, nrow = 2, ncol =2)
omegaL[1, 1] = omega_cvar[1]
omegaL[2, 1] = omega_cvar[2]
omegaL[2, 2] = omega_cvar[3]
omega = omegaL%*%t(omegaL)
x <- matrix(data=0,nrow =rn,ncol=14) #first col is reporting/missing, 2-12 is probability for each individual and each group
library(pbivnorm)
x[, 1] = matrix(drcog$grp)
# threshold matrix calculation
th <- matrix(data=0,nrow =rn,ncol=2)
for (i in 1:rn){
curr_cat = x[i, 1]
if (curr_cat>0) {
th[i, 1] = inc_thres[1, curr_cat]
th[i, 2] = inc_thres[1, curr_cat+1]
}
}
y11 = reportage%*%gammar
y22 = (grp%*%gammal)
cor1 = -1* cov2cor(omega)
LL = matrix(data = 0, nrow = rn, ncol = 1)
upper_lim = 1e-5
for (i in 1:rn) {
curr_cat = x[i, 1]
if (curr_cat>0) {
th_low = (th[i, 1] - y22[i, ])/omega[2,2]
th_up = (th[i, 2] - y22[i, ])/omega[2,2]
nnn = (pbivnorm(th_up, y11[i, 1], rho = cor1[1, 2]) - pbivnorm(th_low, y11[i, 1], rho = cor1[1, 2]))
if (nnn > upper_lim) {
LL[i, 1] = log(nnn)
}
else {
LL[i, 1] = log(upper_lim)
}
}
else {
nnn = (1 - pnorm(y11[i, 1]))
if (nnn > upper_lim){
LL[i, 1] = log(nnn)
}
else {
LL[i, 1] = log(upper_lim)
}
}
}
return(sum(LL))
}
grad.norm <- function(imput) {
require(numDeriv)
return(sum(grad(imput, param)^2))
}
#mle estimation
#install package stats4
library(stats4)
set.seed(123)
#fit1 = optim(par = param, fn = imput, gr = NULL, lower = c(-Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, 1, -Inf, -Inf), upper = c(Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, 1.01, Inf, Inf), method = "L-BFGS-B", control = list(trace = 1, REPORT =1))
library(optimx)
fit1 = optimx(param, imput, gr = NULL, lower = c(-Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, 1, -Inf, -Inf), upper = c(Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, 1.01, Inf, Inf), method = "L-BFGS-B", control = list(trace = 1, REPORT =1))
我得到的输出是
Analytic gradient not made available.
Analytic Hessian not made available.
Scale check -- log parameter ratio= 0 log bounds ratio= 0
Method: L-BFGS-B
iter 1 value -63967.987853
iter 2 value -66454.130893
iter 3 value -66870.500976
iter 4 value -68290.490185
iter 5 value -71406.527209
iter 6 value -71569.074462
Error in if (nnn > upper_lim) { : missing value where TRUE/FALSE needed
optim function evaluation failure
Post processing for method L-BFGS-B
Save results from method L-BFGS-B
$fevals
[1] NA
$convcode
[1] 9999
$value
[1] 8.988466e+307
$par
[1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
我尝试过使用其他优化包,例如 maxLik、maxBFGS、optim。出于某种原因,那里也没有收敛。如果您需要用于编译的数据集,请告诉我。