r - 具有新因子水平的 predict.gam

Question

我正在对物种分布数据进行障碍类型分析，其中涉及两个拟合步骤。第一步是使用 family=quasibinomial 的所有数据对 (m1) 存在/不存在数据进行建模。第二步（m2）是使用family=Gamma的仅正存在数据。这非常有效，直到我尝试在完整数据集上使用第二个模型 (m2) 进行预测，但由于新的因子水平，我收到了一个错误。我了解我收到此错误的原因；完整数据集中出现的因子水平在减少（仅存在）数据集中不存在。我的问题是如何解决这个错误，以便我可以使用完整集上的第二个模型获得预测？我正在使用mgcv。

编辑：更新了额外的代码和数据。

 # Step1 - GAM using full dataset for presence/absense
grays<-structure(list(Grid_ID = structure(c(39L, 51L, 52L, 67L), .Label = c("1", 
    "1,000", "1,001", "1,008", "1,009", "1,010", "1,011", "1,012", 
    "1,013", "1,014", "1,015", "1,016", "1,022", "1,023", "1,024", 
    "1,025", "1,026", "1,027", "1,028", "1,029", "1,034", "1,035", 
    "1,036", "1,037", "1,039", "1,040", "1,045", "1,046", "1,047", 
    "1,048", "1,053", "1,054", "1,055", "10", "100", "101", "103", 
    "104", "105", "106", "107", "108", "109", "11", "110", "118", 
    "119", "12", "122", "125", "126", "127", "128", "129", "13", 
    "130", "131", "132", "133", "14", "141", "142", "15", "150", 
    "151", "152", "153", "154", "155", "156", "157", "158", "159", 
    "160", "161", "162", "163", "167", "168", "169", "173", "174", 
    "175", "176", "177", "178", "179", "180", "181", "182", "183", 
    "184", "185", "188", "189", "190", "196", "197", "198", "199", 
    "2", "20", "200", "201", "202", "203", "204", "205", "206", "207", 
    "209", "210", "211", "219", "22", "220", "221", "222", "223", 
    "224", "225", "226", "227", "228", "229", "23", "230", "231", 
    "233", "234", "235", "236", "237", "24", "246", "247", "248", 
    "249", "25", "250", "252", "253", "254", "255", "256", "257", 
    "258", "259", "26", "260", "261", "267", "268", "269", "27", 
    "270", "271", "272", "273", "274", "275", "276", "277", "278", 
    "279", "28", "280", "281", "286", "287", "288", "289", "29", 
    "290", "291", "292", "293", "294", "295", "296", "297", "298", 
    "299", "3", "300", "301", "302", "303", "305", "306", "307", 
    "308", "309", "310", "311", "312", "313", "314", "315", "316", 
    "317", "318", "319", "320", "321", "326", "327", "328", "329", 
    "330", "331", "332", "333", "334", "335", "336", "337", "339", 
    "340", "341", "343", "344", "345", "346", "347", "348", "349", 
    "350", "351", "352", "355", "356", "357", "36", "360", "361", 
    "362", "363", "364", "365", "366", "367", "368", "369", "37", 
    "372", "373", "374", "38", "380", "381", "382", "383", "384", 
    "385", "386", "39", "391", "392", "397", "398", "399", "4", "40", 
    "400", "401", "402", "408", "409", "41", "410", "412", "413", 
    "414", "415", "416", "417", "42", "423", "424", "425", "426", 
    "43", "430", "431", "432", "433", "434", "44", "441", "442", 
    "443", "444", "447", "448", "449", "45", "450", "451", "458", 
    "459", "46", "460", "461", "462", "463", "464", "465", "466", 
    "470", "471", "472", "473", "474", "475", "476", "484", "485", 
    "486", "487", "488", "489", "490", "491", "492", "496", "497", 
    "498", "499", "5", "500", "501", "513", "514", "515", "516", 
    "517", "518", "523", "524", "525", "526", "527", "528", "529", 
    "54", "541", "542", "543", "544", "545", "55", "550", "551", 
    "552", "553", "554", "56", "569", "57", "570", "571", "572", 
    "573", "574", "578", "579", "580", "581", "582", "599", "60", 
    "600", "601", "602", "603", "604", "605", "606", "607", "608", 
    "609", "61", "610", "62", "626", "627", "628", "629", "63", "632", 
    "633", "634", "635", "636", "637", "638", "639", "64", "653", 
    "654", "655", "656", "657", "658", "659", "660", "663", "664", 
    "665", "666", "667", "668", "669", "670", "671", "672", "673", 
    "687", "688", "689", "690", "691", "692", "693", "696", "697", 
    "698", "699", "7", "700", "701", "702", "703", "704", "705", 
    "716", "717", "718", "720", "721", "722", "723", "724", "725", 
    "726", "727", "728", "739", "74", "740", "741", "746", "747", 
    "748", "749", "75", "750", "751", "752", "753", "754", "764", 
    "765", "768", "769", "77", "770", "771", "772", "773", "78", 
    "782", "783", "784", "788", "789", "79", "790", "798", "799", 
    "8", "80", "800", "801", "804", "805", "81", "812", "813", "814", 
    "815", "816", "819", "82", "820", "821", "827", "828", "829", 
    "83", "830", "831", "833", "834", "835", "836", "84", "842", 
    "843", "844", "845", "846", "849", "85", "850", "851", "852", 
    "853", "854", "860", "861", "862", "863", "864", "869", "870", 
    "871", "872", "873", "874", "88", "881", "882", "883", "884", 
    "885", "886", "89", "890", "891", "892", "893", "894", "9", "902", 
    "903", "904", "905", "906", "908", "909", "910", "911", "912", 
    "922", "923", "924", "925", "926", "927", "928", "929", "930", 
    "940", "941", "942", "943", "944", "945", "946", "947", "948", 
    "957", "958", "959", "96", "960", "961", "962", "963", "964", 
    "965", "966", "97", "976", "977", "978", "979", "980", "981", 
    "982", "983", "984", "992", "993", "994", "995", "996", "997", 
    "998", "999"), class = "factor"), Grid_Lat = c(56.85582097, 56.90062505, 
    56.90024495, 56.94461032), Grid_Long = c(153.4783612, 153.4777153, 
    153.3954873, 153.3124098), Er_Pres = c(0L, 0L, 0L, 0L), Er_Count = c(0L, 
    0L, 0L, 0L), Er_Count_Density = c(0, 0, 0, 0), Month = structure(c(8L, 
    8L, 8L, 8L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", 
    "9", "10", "11"), class = "factor"), Year = structure(c(1L, 1L, 
    1L, 1L), .Label = c("1997", "1998", "1999", "2000", "2001", "2002", 
    "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", 
    "2011", "2012", "2013"), class = "factor"), chl = c(0.53747, 
    0.53747, 0.53747, 0.581741), SST = c(13.4171, 13.4171, 13.4171, 
    13.4025002), Bathymetry = c(76.11354065, 92.14147949, 90.60312653, 
    71.55316162), Grid_Area = c(25, 25, 25, 25), DFS = c(6.807817092, 
    4.233185446, 9.199096676, 5.153224038), Slope = c(0.13670446, 
    0.38316911, 0.08646853, 0.20038579), DOY = c(244L, 244L, 244L, 
    244L)), .Names = c("Grid_ID", "Grid_Lat", "Grid_Long", "Er_Pres", 
    "Er_Count", "Er_Count_Density", "Month", "Year", "chl", "SST", 
    "Bathymetry", "Grid_Area", "DFS", "Slope", "DOY"), row.names = c(NA, 
    4L), class = "data.frame")
    m1<-gam(Er_Pres~ s(Grid_Lat,Grid_Long,k=10,bs='tp')+Month+Year+s(SST,k=5,bs='tp'),family=quasibinomial(link='logit'),data=grays,gamma=1.4,offset(Grid_Area))

#step 2 - reduce dataset and run second GAM for positive abundance only. 
grays2<-subset(grays,Er_Pres>0)
m2<-gam(Er_Count~ Year +s(Grid_Lat,Grid_Long,k=10,bs='tp') + s(SST,k=5,bs='tp') + s(sqrt(DFS),k=5,bs='tp') + Month +log10(chl),family=Gamma(link='log'),data=grays2,Gamma=1.4,offset(Grid_Area))

运行第二个模型会给我以下错误：

Error in predict.gam(m2, newdata = full, type = "response") : 
 1997, 1998, 2006, 2007 not in original fit

score 0 · Accepted Answer

这是一篇旧帖子，所以我怀疑您现在已经找到了解决方案，但如果没有考虑这个：

如果您只想说明同一年内的数据比一年中的数据更相似，但您不一定对特定年份的影响（比如 2007 年和 1998 年之间的差异）感兴趣，那么您可以将年份指定为随机效应.

我相信有几种方法可以做到这一点，但在 mgcv 中，您可以指定：

s(Year, bs="re")

r - 具有新因子水平的 predict.gam

1 回答 1

Related

Reference