0

我正在使用名为 productQuality 的 CSV 数据集,其中每一行代表一个焊缝类型和该特定焊缝的 beta 分布参数(α 和 β)。我想知道是否有办法计算和列出每种焊接类型的中位数?这是我的数据集的一个输入:

structure(list(weld.type.ID = 1:33, weld.type = structure(c(29L, 
11L, 16L, 4L, 28L, 17L, 19L, 5L, 24L, 27L, 21L, 32L, 12L, 20L, 
26L, 25L, 3L, 7L, 13L, 22L, 33L, 1L, 9L, 10L, 18L, 15L, 31L, 
8L, 23L, 2L, 14L, 6L, 30L), .Label = c("1,40,Material A", "1,40S,Material C", 
"1,80,Material A", "1,STD,Material A", "1,XS,Material A", "10,10S,Material C", 
"10,160,Material A", "10,40,Material A", "10,40S,Material C", 
"10,80,Material A", "10,STD,Material A", "10,XS,Material A", 
"13,40,Material A", "13,40S,Material C", "13,80,Material A", 
"13,STD,Material A", "13,XS,Material A", "14,40,Material A", 
"14,STD,Material A", "14,XS,Material A", "15,STD,Material A", 
"15,XS,Material A", "2,10S,Material C", "2,160,Material A", "2,40,Material A", 
"2,40S,Material C", "2,80,Material A", "2,STD,Material A", "2,XS,Material A", 
"4,80,Material A", "4,STD,Material A", "6,STD,Material A", "6,XS,Material A"
), class = "factor"), alpha = c(281L, 196L, 59L, 96L, 442L, 98L, 
66L, 30L, 68L, 43L, 35L, 44L, 23L, 14L, 24L, 38L, 8L, 8L, 5L, 
19L, 37L, 38L, 6L, 11L, 29L, 6L, 16L, 6L, 16L, 3L, 4L, 9L, 12L
), beta = c(7194L, 4298L, 3457L, 2982L, 4280L, 3605L, 2229L, 
1744L, 2234L, 1012L, 1096L, 1023L, 1461L, 1303L, 531L, 233L, 
630L, 502L, 328L, 509L, 629L, 554L, 358L, 501L, 422L, 566L, 403L, 
211L, 159L, 268L, 167L, 140L, 621L)), class = "data.frame", row.names = c(NA, 
-33L))
4

1 回答 1

2

根据Wikipedia,对于 alpha、beta >1 的中位数有一个近似解,但没有一般的封闭式解。下面我实现蛮力精确解和近似解:

## I_{1/2}^{-1}(alpha,beta)
med_exact0 <- function(alpha,beta,eps=1e-12) {
    uniroot(function(x) pbeta(x,alpha,beta)-1/2,
            interval=c(eps,1-eps))$root
}
med_exact <- Vectorize(med_exact0, vectorize.args=c("alpha","beta"))
med_approx <- function(alpha,beta) (alpha-1/3)/(alpha+beta-2/3)

编辑评论指出,逆(“蛮力”)解决方案已经在基础 R 中实现为qbeta(p=0.5,...)!几乎可以肯定比我的解决方案更健壮和计算效率更高......

我打电话给你的数据dd

evals <- with(dd,med_exact(alpha,beta))
avals <- with(dd,med_approx(alpha,beta))
evals2 <- with(dd,qbeta(0.5,alpha,beta))
max(abs((evals-avals)/evals))  ## 0.0057

在您的数据中最坏的情况下,精确和近似的解决方案相差约 0.6% ...

于 2019-11-27T20:35:21.360 回答