0

我正在尝试编写一个执行以下操作的函数: 1. 生成和 x,y 散点图 2. 添加黄土曲线 3. 添加基于 AIC 模型拟合过程的曲线,其中最佳拟合模型可以是线性、二次或立方体。我只想为这一步画一条线(即最合适的,而不是所有 3 种可能性)。

我可以做第 1 步和第 2 步,但不能让#3 工作。我哪里错了?下面的示例数据,但我将在各种数据集上运行此函数,其中一些数据集的长度和 NA 会有所不同。

maindata = structure(c(1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 
1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 
1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 
1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 
1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, -1.54, 
1.5, 3.26, -2.79, 0.54, -0.51, -2.12, 1.83, -1.88, 0.47, -1.05, 
-2.16, -1.04, -1.77, -2.54, 1.67, -2.97, -2.58, NA, -0.08, 
2.05, 0.27, 2.18, 0.01, NA, -2.08, -0.42, -0.23, -1.58, -0.55, 
2.63, 0.38, 2.17, -3.09, 3.14, -3.01, -0.13, 2.38, 3.88, 1.14, 
2.54, 1.71, 2.86, -1.11, -1.98, -0.93, 1.03, 2.25, 1.18, -1.91, 
1, -0.09, 0.7, -1.35, -0.2, 1.35, 1.72, 0.72, -5.96, 2.95, -0.25, 
NA, 47, 40, NA, 20, 70, 80, 30, 33, 40, 71, 63, 25, 66, 41, 25, 
38, 18, 22, 60, 85, 30, 75, 25, 80, 65, 33, 85, 95, 45, 75, 19, 
75, 27, 13, 14, 15, 99, 22, 10, NA, 20, 35, 17, 55, 35, 70, 47, 
24, 45, 38, 50, 90, 60, 50, 100, 42, 34, 55, 10, 15, 90, NA), .Dim = c(62L, 
3L), .Dimnames = list(NULL, c("year", "x_values", "y_values")))


dd_plot = function(x, y, yaxis, xaxis) {
  subset.data = subset(maindata, x!="NA" & y!="NA"); 
  plot(subset.data$y~subset.data$x, pch = 20, ylab = yaxis, xlab=xaxis)
    lines(loess.smooth(subset.data$x, subset.data$y), col = "blue", lwd = 2, lty =2)
  fit = stepAIC(lm(subset.data$y~subset.data$x+I(subset.data$x^2)+I(subset.data$x^3)))
  lines(subset.data$x, predict(fit), col="red")
    legend("topleft", c("Lowess","Best Fit Polynomial"), lty = c(2,1), col=c("blue","red"), bty="n", xjust = -0.2)
}

dd_plot(y = y_values, x = x_values, yaxis = "Y_label", xaxis = "X_label")
4

2 回答 2

1

好吧,您并没有准确描述哪个部分“不起作用”或者它与您的期望不符。你的样本有一堆不匹配的变量名,也许你的真实数据没有,它正在寻找xymaindata 中实际上有x_valuesand y_values。然后你调用dd_plot引用变量x_values并且y_values不存在。

但是我假设您已经过去了,他们谈论的是绘制的弦球,而不是最佳拟合多项式的简单曲线。问题是您的数据在绘制线条时应该按 x 值排序。R 只是连接连续的点。所以我想你想要

lines(subset.data$x[order(subset.data$x)], 
    predict(fit)[order(subset.data$x)], col="red")

而不是绘制未排序的数据。那返回

在此处输入图像描述

于 2014-07-11T23:02:40.350 回答
1

这里有几个不同的问题。从风格上讲,您通常希望使用数据框而不是您生成的“结构”。第二个问题是您将函数内部的数据与外部数据混合在一起。这并不是完全错误的,但它通常会使解决问题变得更加困难。

更改这些解决了问题:

maindata <-
  data.frame(year=c(1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 
               1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 
               1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 
               1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 
               1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 
               2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013),
             x_values=c(-1.54, 1.5, 3.26, -2.79, 0.54, -0.51, -2.12, 1.83, -1.88, 0.47, -1.05, 
               -2.16, -1.04, -1.77, -2.54, 1.67, -2.97, -2.58, NA, -0.08, 
               2.05, 0.27, 2.18, 0.01, NA, -2.08, -0.42, -0.23, -1.58, -0.55, 
               2.63, 0.38, 2.17, -3.09, 3.14, -3.01, -0.13, 2.38, 3.88, 1.14, 
               2.54, 1.71, 2.86, -1.11, -1.98, -0.93, 1.03, 2.25, 1.18, -1.91, 
               1, -0.09, 0.7, -1.35, -0.2, 1.35, 1.72, 0.72, -5.96, 2.95, -0.25, 
               NA),
             y_values=c(47, 40, NA, 20, 70, 80, 30, 33, 40, 71, 63, 25, 66, 41, 25, 
               38, 18, 22, 60, 85, 30, 75, 25, 80, 65, 33, 85, 95, 45, 75, 19, 
               75, 27, 13, 14, 15, 99, 22, 10, NA, 20, 35, 17, 55, 35, 70, 47, 
               24, 45, 38, 50, 90, 60, 50, 100, 42, 34, 55, 10, 15, 90, NA))

dd_plot = function(x, y, ...) {
  subset.data = subset(data.frame(x, y), !is.na(x) & !is.na(y)); 
  plot(subset.data$y~subset.data$x,
       pch = 20, ...)
  lines(loess.smooth(subset.data$x, subset.data$y),
        col = "blue", lwd = 2, lty = 2)
  fit = stepAIC(lm(subset.data$y~subset.data$x+I(subset.data$x^2)+I(subset.data$x^3)))
  lines(subset.data$x, predict(fit), col="red")
  legend("topleft", c("Loess","Best Fit Polynomial"),
         lty = c(2,1), col=c("blue","red"), bty="n", xjust = -0.2)
}

dd_plot(y = maindata$y_values, x = maindata$x_values, ylab = "Y_label", xlab = "X_label")
于 2014-07-12T01:29:26.873 回答