我正在尝试编写自己的函数。有 2 个子集函数用于删除部分数据。第二个子集功能有效,但第一个失败。我检查了用于子集数据的字段的类和级别,一切似乎都正确。代码如下所示:
mdl.sum<-function(seed=0, times=0, method=c("Boostrap", "cv"), alpha=0.05, data, Prev.vars=NULL, varia, depvar, Expo=NULL, WP=NULL, weight, offset=NULL, p=1.6, outfile){
mdlout<-data.frame(var.lev=character(0), Estimate = numeric(0),pvalue=numeric(0), AIC=numeric(0), converged=numeric(0), Varorder=numeric(0), CVGroup = numeric(0) )
set.seed(seed)
data$y<-data[, depvar]/data[, weight]
loc_y<-dim(data)[2]
index<-createResample(data$y, times, list=FALSE)
foreach(j = 1:times) %dopar% {
CV.Grp<-j
train <- data[index[,j],]
for (i in varia) {
if (i %in% Prev.vars){next} else{
if (length(Prev.vars)==0) {
Mdl.form<-as.formula(paste(c(names(train)[loc_y], paste(c(paste(c("offset(", names(train)[offset], ")"), collapse=""), names(train)[i]), collapse="+")), collapse="~"))
} else{
Mdl.form<-as.formula(paste(c(names(train)[loc_y], paste(c(paste(c("offset(", names(train)[offset], ")"), collapse=""), names(train)[Prev.vars],names(train)[i]), collapse="+")), collapse="~"))
}
LR.glm<-glm(Mdl.form, family=tweedie(var.power=p, link.power=0),data=train, weights=train[, weight])
temp.coef<-data.frame(coef(summary(LR.glm)))
temp.coef<-cbind(var.lev=row.names(temp.coef), temp.coef)
mdlout<-rbind(mdlout, cbind(temp.coef[,c(1:2,5)], AIC=AICtweedie(LR.glm), converged=(LR.glm$converged+0), Varorder=i, CVGroup = CV.Grp ))
}
}
}
mdlout2<-subset(mdlout, mdlout$converged==1)
mdlout2<-subset(mdlout, mdlout$var.lev!="(Intercept)")
write.csv(mdlout2, file=outfile, row.names = FALSE)
}
然后,运行以下代码:
library(lattice)
library(Matrix)
library(MASS)
library(RODBC)
library(statmod)
library(tweedie)
library(ggplot2)
library(gtools)
library(colorspace)
library(vcd)
library(cvTools)
library(car)
library(caret)
library(foreach)
library(xtable)
mdl.sum(seed=0, times=5, method="Boostrap", alpha=0.2, data=table.1.2,Prev.vars=2, varia=1:3,depvar=7, Expo=4, WP=8, weight=4, offset=10, p=1.7, outfile='D:/b1.csv')
根据 Jdbaba 的建议,我 dput(table.1.2) 如下所示。
> dput(table.1.2)
structure(list(premiekl = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", "2"), class = "factor", comment = c("Name: Class",
"Code: 1=Weight over 60kg and more than 2 gears", "Code: 2=Other"
)), moptva = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("1", "2"), class = "factor", comment = c("Name: Age",
"Code: 1=At most 1 year", "Code: 2=2 years or more")), zon = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("1",
"2", "3", "4", "5", "6", "7"), class = "factor", comment = c("Name: Zone",
"Code: 1=Central and semi-central parts of Sweden's three largest cities",
"Code: 2=suburbs and middle-sized towns", "Code: 3=Lesser towns, except those in 5 or 7",
"Code: 4=Small towns and countryside, except 5--7", "Code: 5=Northern towns",
"Code: 6=Northern countryside", "Code: 7=Gotland (Sweden's largest island)"
)), dur = structure(c(62.9, 112.9, 133.1, 376.6, 9.4, 70.8, 4.4,
352.1, 840.1, 1378.3, 5505.3, 114.1, 810.9, 62.3, 191.6, 237.3,
162.4, 446.5, 13.2, 82.8, 14.5, 844.8, 1296, 1214.9, 3740.7,
109.4, 404.7, 66.3), comment = c("Name: Duration", "Unit: year"
)), medskad = structure(c(18256L, 13632L, 20877L, 13045L, 0L,
15000L, 8018L, 8232L, 7418L, 7318L, 6922L, 11131L, 5970L, 6500L,
7754L, 6933L, 4402L, 8214L, 0L, 5830L, 0L, 4728L, 4252L, 4212L,
3846L, 3925L, 5280L, 7795L), comment = c("Name: Claim severity",
"Unit: SEK")), antskad = structure(c(17L, 7L, 9L, 7L, 0L, 1L,
1L, 52L, 69L, 75L, 136L, 2L, 14L, 1L, 43L, 34L, 11L, 8L, 0L,
3L, 0L, 94L, 99L, 37L, 56L, 4L, 5L, 1L), comment = "Name: No. claims"),
riskpre = structure(c(4936L, 845L, 1411L, 242L, 0L, 212L,
1829L, 1216L, 609L, 398L, 171L, 195L, 103L, 104L, 1740L,
993L, 298L, 147L, 0L, 211L, 0L, 526L, 325L, 128L, 58L, 144L,
65L, 118L), comment = c("Name: Pure premium", "Unit: SEK"
)), helpre = structure(c(2049L, 1230L, 762L, 396L, 990L,
594L, 396L, 1229L, 738L, 457L, 238L, 594L, 356L, 238L, 1024L,
615L, 381L, 198L, 495L, 297L, 198L, 614L, 369L, 229L, 119L,
297L, 178L, 119L), comment = c("Name: Actual premium", "Note: The premium for one year according to the tariff in force 1999",
"Unit: SEK")), skadfre = structure(c(0.27027027027027, 0.0620017714791851,
0.067618332081142, 0.0185873605947955, 0, 0.0141242937853107,
0.227272727272727, 0.1476853166714, 0.0821330793953101, 0.0544148588841326,
0.0247034675676167, 0.0175284837861525, 0.017264767542237,
0.0160513643659711, 0.224425887265136, 0.143278550358196,
0.0677339901477833, 0.0179171332586786, 0, 0.036231884057971,
0, 0.111268939393939, 0.0763888888888889, 0.0304551814964195,
0.0149704600743176, 0.036563071297989, 0.0123548307388189,
0.0150829562594268), comment = c("Name: Claim frequency",
"Unit: /year")), offset = structure(c(7.6251071482389, 7.11476944836646,
6.63594655568665, 5.98141421125448, 6.89770494312864, 6.38687931936265,
5.98141421125448, 7.11395610956603, 6.60394382460047, 6.12468339089421,
5.47227067367148, 6.38687931936265, 5.87493073085203, 5.47227067367148,
6.93147180559945, 6.42162226780652, 5.9427993751267, 5.28826703069454,
6.20455776256869, 5.6937321388027, 5.28826703069454, 6.41999492814714,
5.91079664404053, 5.43372200355424, 4.77912349311153, 5.6937321388027,
5.18178355029209, 4.77912349311153), comment = c("Name: Actual premium",
"Note: The premium for one year according to the tariff in force 1999",
"Unit: SEK"))), .Names = c("premiekl", "moptva", "zon", "dur",
"medskad", "antskad", "riskpre", "helpre", "skadfre", "offset"
), row.names = c(NA, -28L), comment = c("Title: Partial casco moped insurance from Wasa insurance, 1994--1999",
"Source: http://www2.math.su.se/~esbj/GLMbook/moppe.sas", "Copyright: http://www2.math.su.se/~esbj/GLMbook/"
), class = "data.frame")