3

我希望找到使用 R 的 fitdistr 函数 (MLE) 截断的分布的 weibull 形状和尺度参数。使用树木直径的数据样本(其中最小的是 2.8):

data<-c(42.7,18.8,30.0,20.3,32.5,18.8,16.0,42.9,18.8,17.3,21.1,23.4,15.0,16.8,15.2,15.0,14.7,17.3,20.1,18.3,16.0,15.7,21.3,
    19.1,17.3,17.0,17.3,17.5,21.6,15.7,12.7,13.2,3.6,3.6,3.6,3.6,3.6,3.6,3.6,3.6,3.6,3.6,3.6,3.6,2.8,2.8,2.8,2.8,2.8,
    2.8,2.8,2.8,2.8,2.8,2.8,2.8,12.2,12.2,12.2,12.2,12.2,12.2,12.2,12.2,12.2,12.2,12.2,12.2,4.3,4.3,4.3,4.3,4.3,4.3,
    4.3,4.3,4.3,4.3,4.3,4.3,2.8,2.8,2.8,2.8,2.8,2.8,2.8,2.8,2.8,2.8,2.8,2.8,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,
    5.6,5.6,18.0,16.3,34.8,17.5,6.1,6.1,6.1,6.1,6.1,6.1,6.1,6.1,6.1,6.1,6.1,6.1,6.3,6.3,6.3,6.3,6.3,6.3,6.3,6.3,6.3,
    6.3,6.3,6.3,9.4,9.4,9.4,9.4,9.4,9.4,9.4,9.4,9.4,9.4,9.4,9.4,3.8,3.8,3.8,3.8,3.8,3.8,3.8,3.8,3.8,3.8,3.8,3.8)

library(MASS)
wb<-fitdistr(data,'weibull',lower=0.1) # MLE for weibull parameter determination
wb

结果:形状比例
1.36605920 9.97356797

给定数据分布,人们会期望负单调曲线(例如 shape<1)。然而,这些结果表明 shape>1,因为 fitdistr 没有考虑数据被截断的事实。在其他地方,建议如下:

ltwei<-function(x,shape,scale=1,log=FALSE){dweibull(x,shape,scale,log)/pweibull(1,shape,scale,lower=FALSE) } 
ltweifit<-fitdistr(data,ltwei,start=list(shape=1,scale=10)) 
ltweifit 

但这会导致更大的威布尔形状值。如何为考虑数据左截断的分布生成形状和比例参数?提前谢谢了。

4

1 回答 1

1

这是对给出的建议的修改,因为我认为分母不正确。尝试将分母更改为1-pweibull(trunc, ...)看看这是否会产生更好的拟合:

> ptwei <- function(x, shape, scale , log = FALSE) 
+               pweibull(x, shape, scale, log)/(1-
+                 pweibull(2.8, shape, scale, lower=FALSE))
> dtwei <- function(x, shape, scale , log = FALSE) 
+               dweibull(x, shape, scale, log)/(1-
+                 pweibull(2.8, shape, scale, lower=FALSE))
> fitdist(data, dtwei, start=list(shape=1.2, scale=4))
Fitting of the distribution ' twei ' by maximum likelihood 
Parameters:
       estimate Std. Error
shape 0.4555414 0.02660275
scale 0.8751571 0.12236256

我现在意识到 Ripley 教授正在使用 lower 参数来实现我在上面所做的事情,因此只要使用ltwei调用该函数,原始代码就可以工作lower = FALSE

------------

这是根据 2008 年 10 月 7 日 Rhelp 发布的 Brian Ripley 教授的建议:

ltwei <- function(x, shape, scale = 1, log = FALSE) 
              dweibull(x, shape, scale, log)/
                pweibull(2.8, shape, scale, lower=FALSE)

威布尔密度通过在截断点除以 CMF 来归一化。(使用 lower=FALSE,pweibull 函数返回从截断点到 的积分密度Inf。)

library(MASS)
wb<-fitdistr(data,ltwei,start=list(shape=1,scale=1) ) 

There were 50 or more warnings (use warnings() to see the first 50)
> wb
      shape         scale   
   1.81253163   12.72912552 
 ( 0.07199877) ( 0.54855731)
> warnings()[1:5]
Warning messages:
1: In dweibull(x, shape, scale, log) : NaNs produced
2: In pweibull(2.8, shape, scale, lower = FALSE) : NaNs produced
3: In dweibull(x, shape, scale, log) : NaNs produced
4: In pweibull(2.8, shape, scale, lower = FALSE) : NaNs produced
5: In dweibull(x, shape, scale, log) : NaNs produced
> library(MASS)
> wb<-fitdistr(data,ltwei,start=list(shape=1.1,scale=10) ) 
> wb
      shape         scale   
   1.81253113   12.72912870 
 ( 0.07199877) ( 0.54855782)

您可以看到一些起始值会生成警告,但算法仍然成功。如果您从更接近“真实值”的值开始,则不会出现警告。恐怕你的期望没有得到满足。有时对 Weibull 分布的参数化存在混淆,因为它们的处理方式有不同的约定。这就是 Terry Therneau 在他的生存::survreg 帮助页面中的内容:

# There are multiple ways to parameterize a Weibull distribution. The survreg 
# function imbeds it in a general location-scale familiy, which is a 
# different parameterization than the rweibull function, and often leads
# to confusion.
#   survreg's scale  =    1/(rweibull shape)
#   survreg's intercept = log(rweibull scale)
#   For the log-likelihood all parameterizations lead to the same value.

由于您似乎对我的结果不满意,因此fitdistr我也fitdist从“fitdistrplus”包中运行并得到了几乎相同的答案。我仍然认为您需要检查参数的解释,并且可能还需要检查这必然是 Weibull 分布的数据的关键假设:

 dtwei <- function(x, shape, scale , log = FALSE) 
               dweibull(x, shape, scale, log)/
                 pweibull(2.8, shape, scale, lower=FALSE)
 
ptwei <- function(x, shape, scale , log = FALSE) 
               pweibull(x, shape, scale, log)/
                 pweibull(2.8, shape, scale, lower=FALSE)
fitdist(data, dtwei, start=list(shape=1.2, scale=4))
#----------------
Fitting of the distribution ' twei ' by maximum likelihood 
Parameters:
       estimate Std. Error
shape  1.812706 0.07199987
scale 12.728087 0.54839034
于 2015-01-22T18:26:37.493 回答