我权衡了点不完全平滑的模拟生成的曲线。我想拟合样条曲线并计算最大值。由于点的锯齿状特性,尤其是在峰值附近,将样条曲线拟合到整个曲线是没有意义的。相反,我想丢弃曲线的大多数(不是全部)无信息部分(其中点不是均匀地增加/减少),以便我可以拟合更好的样条曲线。
这里有 3 条示例曲线。
structure(list(x = c(0.01, 0.0615789473684211, 0.113157894736842,
0.164736842105263, 0.216315789473684, 0.267894736842105, 0.319473684210526,
0.371052631578947, 0.422631578947368, 0.474210526315789, 0.52578947368421,
0.577368421052632, 0.628947368421053, 0.680526315789474, 0.732105263157895,
0.783684210526316, 0.835263157894737, 0.886842105263158, 0.938421052631579,
0.99), y = c(0.67455086336912, 0.798345993110331, 0.842040269562323,
0.876826597424532, 0.903925678739277, 0.922394814941881, 0.937688904117379,
0.952464844120848, 0.959010185199922, 0.960880828342136, 0.968131047741576,
0.967998560989975, 0.963479216138144, 0.947228114551568, 0.930237609460854,
0.900655040504384, 0.849692079994379, 0.793895388319592, 0.715064370596625,
0.586048018267692)), .Names = c("x", "y"), row.names = c(NA,
-20L), class = "data.frame")
structure(list(x = c(0.01, 0.0615789473684211, 0.113157894736842,
0.164736842105263, 0.216315789473684, 0.267894736842105, 0.319473684210526,
0.371052631578947, 0.422631578947368, 0.474210526315789, 0.52578947368421,
0.577368421052632, 0.628947368421053, 0.680526315789474, 0.732105263157895,
0.783684210526316, 0.835263157894737, 0.886842105263158, 0.938421052631579,
0.99), y = c(0.471958752798607, 0.703622592901373, 0.75810009343474,
0.804310552037636, 0.833980459996575, 0.858824563652134, 0.880427517647614,
0.898865451764142, 0.91097315832838, 0.926239088224158, 0.933622984262967,
0.940626455467701, 0.944787886406163, 0.940076642124325, 0.938416888413051,
0.918335717622263, 0.886320360986925, 0.84476577117581, 0.782884400269357,
0.680906832291339)), .Names = c("x", "y"), row.names = c(NA,
-20L), class = "data.frame")
structure(list(x = c(0.01, 0.0615789473684211, 0.113157894736842,
0.164736842105263, 0.216315789473684, 0.267894736842105, 0.319473684210526,
0.371052631578947, 0.422631578947368, 0.474210526315789, 0.52578947368421,
0.577368421052632, 0.628947368421053, 0.680526315789474, 0.732105263157895,
0.783684210526316, 0.835263157894737, 0.886842105263158, 0.938421052631579,
0.99), y = c(0.92758700570137, 1.01008397567002, 1.06088676091004,
1.10000024909512, 1.13048775170492, 1.15176599158524, 1.17517328999588,
1.17489937140761, 1.22188964933233, 1.2073639239554, 1.1686005354797,
1.15073969246578, 1.22855065420885, 1.30756438989448, 1.27583479809645,
1.22221301838554, 1.15463640322463, 1.08629005387455, 1.00778694924309,
0.917029835140733)), .Names = c("x", "y"), row.names = c(NA,
-20L), class = "data.frame")
这是我的spline
功能。
library(mgcv)
cspline <- function(data) {
m <- data$x
lambda <- data$y
m1 <- mgcv::gam(lambda ~ s(m, fx = FALSE, k=-1, bs = "cr"))
predm <- predict(m1, data.frame(m = seq(0.01, 0.99, length = 100000)), se=TRUE)$fit
pm <- seq(0.01, 0.99, length = 100000)
maxy <- max(predm)
maxx <- pm[which(predm == max(predm))]
return(list(maxx, maxy))
}
任何人都可以提出一种算法或想法,使我能够专注于曲线中点变化更多的部分,并仅将样条拟合到那些(并丢弃峰值两侧没有贡献的其他点更多信息)?