我刚刚发现它mgcv::s()
允许为其by
参数提供一个矩阵,允许一个连续变量对变量组合中的每一个(以及它们的相互作用,如果需要的话)进行单独的平滑处理。但是,我无法从此类模型中获得合理的预测,例如:
library(mgcv) #for gam
library(ggplot2) #for plotting
#Generate some fake data
set.seed(1) #for replicability of this example
myData = expand.grid(
var1 = c(-1,1)
, var2 = c(-1,1)
, z = -10:10
)
myData$y = rnorm(nrow(myData)) + (myData$z^2 + myData$z*4) * myData$var1 +
(3*myData$z^2 + myData$z) * myData$var2
#note additive effects of var1 and var2
#plot the data
ggplot(
data = myData
, mapping = aes(
x = z
, y = y
, colour = factor(var1)
, linetype = factor(var2)
)
)+
geom_line(
alpha = .5
)
#reformat to matrices
zMat = matrix(rep(myData$z,times=2),ncol=2)
xMat = matrix(c(myData$var1,myData$var2),ncol=2)
#get the fit
fit = gam(
formula = myData$y ~ s(zMat,by=xMat,k=5)
)
#get the predictions and plot them
predicted = myData
predicted$value = predict(fit)
ggplot(
data = predicted
, mapping = aes(
x = z
, y = value
, colour = factor(var1)
, linetype = factor(var2)
)
)+
geom_line(
alpha = .5
)
产生输入数据的这个图:
这显然是错误的预测值图:
而将上面的 gam fit 替换为:
fit = gam(
formula = y ~ s(z,by=var1,k=5) + s(z,by=var2,k=5)
, data = myData
)
但否则运行相同的代码会产生这个合理的预测值图:
我在这里做错了什么?