3

我是 R 新手,我还不完全理解它的计算逻辑......

在以前的帖子的帮助下,我也无法克服我的问题。

我有一个包含 11 个变量的大约 600 个观察值的数据集。我已经成功地运行了多项式模型,但是我无法实现边际效应,因为我的mean()命令正在获取 NA:

数据集:

> head(data,n=50)
    ID time      CHINN      DEBT ERA         INFL     MONEY  OPENNESS    RESERVES      RGDP       RSVS
1  POL 1993 -1.8639720        NA   0   32.8815343  33.47353  41.05223  4276726138 100.00000  4.2767261
2  POL 1994 -0.8081098        NA   0   30.7586977  31.98628  41.26984  6023061884 105.13912  6.0230619
3  POL 1995 -0.1129656        NA   0   26.0622777  31.69277  44.24652 14957024390 112.15091 14.9570244
4  POL 1996 -1.1688280        NA   0   19.1794515  33.19738  46.04792 18018686050 119.00579 18.0186860
5  POL 1997 -1.1688280        NA   0   14.8181395  35.11881  50.74269 20669498248 127.43530 20.6694982
6  POL 1998 -1.1688280        NA   4   11.1353669  37.70208  56.79412 28276350644 133.65597 28.2763506
7  POL 1999 -1.1688280        NA   4    7.0830285  40.69941  54.29511 27314254555 139.74225 27.3142546
8  POL 2000 -1.1688280  32.96343   1   10.7321436  40.60334  60.67079 27469379821 145.66606 27.4693798
9  POL 2001 -1.1688280  41.37857   1    4.3657643  44.43624  57.77678 26563086922 147.25896 26.5630869
10 POL 2002  0.0644257        NA   1    0.4689145  41.98616  60.73279 29783861006 149.32464 29.7838610
11 POL 2003  0.0644257        NA   1   -0.3092132  42.42629  69.31849 33958963841 154.95895 33.9589638
12 POL 2004  0.0644257        NA   1    2.5056441  40.21144  77.33549 36772764782 162.98411 36.7727648
13 POL 2005  0.0644257        NA   1    1.4888388  43.50525  74.91127 42560657450 168.54956 42.5606575
14 POL 2006  0.0644257        NA   1    0.8029315  46.92580  82.51674 48473947849 178.83656 48.4739478
15 POL 2007  0.0644257        NA   1    0.7577241  47.82156  84.38853 65724834811 190.53326 65.7248348
16 POL 2008  0.0644257        NA   1    3.5756117  52.36352  83.76331 62183606786 199.83465 62.1836068
17 POL 2009  0.0644257        NA   1    2.6459580  53.71374  78.80573 79521598778 203.40385 79.5215988
18 POL 2010  0.0644257        NA   1    1.6733804  55.42878  85.68774 93472496388 211.50103 93.4724964
19 POL 2011  0.0644257        NA   1    3.0274845  57.87980  91.29239 97712443397 220.71189 97.7124434
20 BGR 1993         NA        NA   1   68.8945207  78.96788  84.04149  1052450357 100.00000  1.0524504
21 BGR 1994 -1.1688280        NA   1   93.5639075  79.70282  90.72013  1396927258 101.92207  1.3969273
22 BGR 1995 -1.1688280        NA   1   60.0454149  67.02978 101.82717  1635188166 104.48391  1.6351882
23 BGR 1996 -0.9050840        NA   1  120.9697303  83.30492 116.19712   864262494  95.38304  0.8642625
24 BGR 1997 -0.9050840        NA   5 1058.1103740  35.17444 112.09386  2485359931  93.68869  2.4853599
25 BGR 1998 -0.9050840        NA   6   18.0824730  28.90920 117.17808  3056954172  97.96299  3.0569542
26 BGR 1999 -0.9050840        NA   6    2.3810375  30.99876 116.55936  3264673405  99.87455  3.2646734
27 BGR 2000 -0.9050840        NA   6   10.9885883  35.97481 106.26211  3507199103 105.59192  3.5071991
28 BGR 2001 -1.1688280        NA   6    6.2354669  40.92721 106.86605  3646131855 109.94781  3.6461319
29 BGR 2002 -1.1688280        NA   6    4.3788710  41.75304 102.98033  4846429828 114.94803  4.8464298
30 BGR 2003 -0.9050840        NA   6    1.0599753  46.25984 107.38463  6825720096 120.93287  6.8257201
31 BGR 2004 -0.6413398        NA   6    5.2752300  51.21173 115.32294  9337108247 128.76534  9.3371082
32 BGR 2005 -0.3775955        NA   6    4.4206316  55.53535  96.16404  8697081229 136.82237  8.6970812
33 BGR 2006  2.1752650        NA   6    6.9496009  61.91483 140.00543 11756001804 145.51154 11.7560018
34 BGR 2007  2.4390090        NA   6    6.7721512  69.93833 138.64999 17544560083 154.43412 17.5445601
35 BGR 2008  2.4390090        NA   6   11.5750056  66.12314 136.94939 17930378450 163.65291 17.9303784
36 BGR 2009  2.4390090  13.82140   6    1.5731668  69.86327 103.84837 18522120691 154.75483 18.5221207
37 BGR 2010  2.4390090  14.93887   6    1.4049372  71.96152 116.71516 17223201500 155.40066 17.2232015
38 BGR 2011  2.4390090  15.44620   6    2.9890214  75.58550 132.27417 17215734344 158.00846 17.2157343
39 CYP 1993 -0.1129656        NA   5    0.8698960 129.36758  95.41637  1276350873 100.00000  1.2763509
40 CYP 1994 -0.1129656        NA   5    2.2051585 131.48065  95.77615  1640273818 105.77324  1.6402738
41 CYP 1995 -0.1129656        NA   5    0.6063991 128.64850 100.10371  1294935266 111.96553  1.2949353
42 CYP 1996 -1.1688280        NA   5    2.3411552 136.94871 104.44163  1704484151 114.02390  1.7044842
43 CYP 1997 -1.1688280        NA   5    3.3418671 145.38644 105.39959  1525648570 116.75559  1.5256486
44 CYP 1998 -1.1688280 261.72872   4    1.6379206 147.00670 100.52824  1512684182 122.55893  1.5126842
45 CYP 1999 -1.1688280 155.75745   4    1.4380285 159.30150 101.75789  1967461070 128.47057  1.9674611
46 CYP 2000 -1.1688280 156.25023   4    4.8139861 161.29472 109.87129  1868515552 135.00970  1.8685156
47 CYP 2001 -1.1688280 160.87807   4    0.8515728 169.52762 109.90206  2396061109 140.39178  2.3960611
48 CYP 2002 -1.1688280 170.52011   4    1.3698956 175.62242 103.06500  3181358048 143.20035  3.1813580
49 CYP 2003 -0.1129656 179.11164   4    3.0419336 174.50803  95.17382  3450932640 145.59528  3.4509326
50 CYP 2004  1.3840320 180.22661   4    1.2153140 171.42676  98.02155  4113824175 151.47324  4.1138242

用于转换数据的命令:

pdata<-plm.data(data,index=c("ID","time"))
mldata<-mlogit.data(pdata,choice="ERA",shape="wide")

均值矩阵的命令:

z<-with(mldata,data.frame(CHINN=mean(CHINN),DEBT=mean(DEBT),INFL=mean(INFL),MONEY=mean(MONEY),OPENNESS=mean(OPENNESS),RGDP=mean(RGDP),RSVS=mean(RSVS)))

我得到的输出:

  CHINN DEBT INFL MONEY OPENNESS     RGDP RSVS
1    NA   NA   NA    NA       NA 133.4633   NA

您能否就此命令的 NA 原因提出建议?

如果它不能为主要是 NAs 的 DEBT 提供正确的输出,我会理解,但为什么它不计算 RSVS、CHINN 和其他人的平均值?

为什么要计算 RGDP 的平均值?两个变量都有:

> class(mldata$CHINN)
[1] "numeric"
> class(mldata$RGDP)
[1] "numeric"

如何克服这个问题?

更新:

感谢 David Arenburg 的评论,我设法计算出平均值:

> z
     CHINN     DEBT     INFL   MONEY OPENNESS     RGDP     RSVS
1 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734

但是,在计算边际效应时会出现另一个错误:

effects(mlogit.data2,covariate="CHINN",data=z)
Error in predict.mlogit(object, data) : 
  the number of rows of the data.frame should be a multiple of the number of alternatives

在复制平均值 5 次以获得 6 行的矩阵 zz 后,有 6 个备选方案:

> zz
     CHINN     DEBT     INFL   MONEY OPENNESS     RGDP     RSVS
1 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734
2 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734
3 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734
4 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734
5 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734
6 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734

effects() 命令给出一个输出:

> effects(mlogit.data2,covariate=c("CHINN","DEBT","INFL","MONEY","OPENNESS","RGDP","RSVS"),data=zz)
            2             0             1             4             5             6 
-1.288185e-02 -1.933325e-02 -8.801676e-06  1.095252e-02  2.120814e-02  6.324152e-05 

但是我无法对这个输出得出任何结论——我不知道我可以将这些边际效应分配给哪个变量。

当我尝试一个一个运行时,我得到另一个错误:

> effects(mlogit.data2,covariate=c("CHINN"),data=zz)
Error in if (rhs %in% c(1, 3)) { : argument is of length zero

模型命令:

mlogit.data2<-mlogit(ERA~1|CHINN+INFL+MONEY+OPENNESS+RGDP+RSVS+DEBT,data=mldata,reflevel="4")

我正在使用的软件包:

library(Formula)
library(miscTools)
library(lattice)
library(zoo)
library(sandwich)
library(maxLik)
library(lmtest)
library(statmod)
library(mlogit)
library(plm)

提前谢谢你, Zyta

4

2 回答 2

2

好的,似乎我有一种解决方案:

http://www.talkstats.com/showthread.php/44314-calculate-marginal-effects-using-mlogit-package

如何查看函数的源代码?

显然这足以看到:

> getAnywhere(effects.mlogit)
A single object matching ‘effects.mlogit’ was found
It was found in the following places
  registered S3 method for effects from namespace mlogit
  namespace:mlogit
with value

function (object, covariate = NULL, type = c("aa", "ar", "rr", 
    "ra"), data = NULL, ...) 
{
    type <- match.arg(type)
    if (is.null(data)) {
        P <- predict(object, returnData = TRUE)
        data <- attr(P, "data")
        attr(P, "data") <- NULL
    }
    else P <- predict(object, data)
    newdata <- data
    J <- length(P)
    alt.levels <- names(P)
    pVar <- substr(type, 1, 1)
    xVar <- substr(type, 2, 2)
    cov.list <- lapply(attr(formula(object), "rhs"), as.character)
    rhs <- sapply(cov.list, function(x) length(na.omit(match(x, 
        covariate))) > 0)
    rhs <- (1:length(cov.list))[rhs]
    eps <- 1e-05
    if (rhs %in% c(1, 3)) {
        if (rhs == 3) {
            theCoef <- paste(alt.levels, covariate, sep = ":")
            theCoef <- coef(object)[theCoef]
        }
        else theCoef <- coef(object)[covariate]
        me <- c()
        for (l in 1:J) {
            newdata[l, covariate] <- data[l, covariate] + eps
            newP <- predict(object, newdata)
            me <- rbind(me, (newP - P)/eps)
            newdata <- data
        }
        if (pVar == "r") 
            me <- t(t(me)/P)
        if (xVar == "r") 
            me <- me * matrix(rep(data[[covariate]], J), J)
        dimnames(me) <- list(alt.levels, alt.levels)
    }
    if (rhs == 2) {
        newdata[, covariate] <- data[, covariate] + eps
        newP <- predict(object, newdata)
        me <- (newP - P)/eps
        if (pVar == "r") 
            me <- me/P
        if (xVar == "r") 
            me <- me * data[[covariate]]
        names(me) <- alt.levels
    }
    me
}
<environment: namespace:mlogit>

然后复制函数并修改其第 16 行:

myeffects<-function (object, covariate = NULL, type = c("aa", "ar", "rr", 
                                             "ra"), data = NULL, ...) 
{
    type <- match.arg(type)
    if (is.null(data)) {
        P <- predict(object, returnData = TRUE)
        data <- attr(P, "data")
        attr(P, "data") <- NULL
    }
    else P <- predict(object, data)
    newdata <- data
    J <- length(P)
    alt.levels <- names(P)
    pVar <- substr(type, 1, 1)
    xVar <- substr(type, 2, 2)
    cov.list <- strsplit(as.character(attr(formula(object), "rhs")), " + ", fixed = TRUE)
    rhs <- sapply(cov.list, function(x) length(na.omit(match(x, 
                                                             covariate))) > 0)
    rhs <- (1:length(cov.list))[rhs]
    eps <- 1e-05
    if (rhs %in% c(1, 3)) {
        if (rhs == 3) {
            theCoef <- paste(alt.levels, covariate, sep = ":")
            theCoef <- coef(object)[theCoef]
        }
        else theCoef <- coef(object)[covariate]
        me <- c()
        for (l in 1:J) {
            newdata[l, covariate] <- data[l, covariate] + eps
            newP <- predict(object, newdata)
            me <- rbind(me, (newP - P)/eps)
            newdata <- data
        }
        if (pVar == "r") 
            me <- t(t(me)/P)
        if (xVar == "r") 
            me <- me * matrix(rep(data[[covariate]], J), J)
        dimnames(me) <- list(alt.levels, alt.levels)
    }
    if (rhs == 2) {
        newdata[, covariate] <- data[, covariate] + eps
        newP <- predict(object, newdata)
        me <- (newP - P)/eps
        if (pVar == "r") 
            me <- me/P
        if (xVar == "r") 
            me <- me * data[[covariate]]
        names(me) <- alt.levels
    }
    me
}

现在结果如下:

 > myeffects(mlogit.data2,covariate="RSVS",data=zz)
            2             0             1             4             5             6 
 3.612318e-03  5.368693e-04 -4.903995e-08 -5.382731e-03  1.238768e-03 -5.175053e-06
于 2014-09-14T12:17:46.473 回答
1

你可以使用colMeans

  op <- options(scipen= 100, digits=2)
  colMeans(mldata[,3:11], na.rm=TRUE)
   #  CHINN           DEBT            ERA           INFL          MONEY 
   #  -0.27         115.25           0.20          33.66          74.66 
   #OPENNESS       RESERVES           RGDP           RSVS 
   #  92.06 18809465124.14         136.56          18.81 
  options(op)

summarise_eachdplyr

library(dplyr)
mldata %>%
summarise_each(funs(round(mean(., na.rm=TRUE),2)), CHINN:RSVS)
# CHINN   DEBT ERA  INFL MONEY OPENNESS    RESERVES   RGDP  RSVS
#1 -0.27 115.25 0.2 33.66 74.66    92.06 18809465124 136.56 18.81
于 2014-09-14T10:05:17.710 回答