1

我正在尝试填补天气数据,我的数据是半小时一次,但在这里我为每小时数据准备了一个可重现的代码。因为天气数据是季节性的,首先我使用 stat::ts() 创建一个时间序列,然后将其提供给卡尔曼滤波器 (imputeTS::na_seadec) 或 forecast::na.interp,但是代码非常慢,而如果我在没有创建 ts 的情况下将原始数据提供给 kalam 过滤器,它会非常快,但它会丢失季节性信息。另外,我在 imputeTS::na_seadec() 中尝试了 find_frequency = TRUE,这再次使代码太慢(单个时间序列需要数小时和数小时)。我想知道是否有办法使用卡尔曼滤波器但保留季节性信息。

library(riem)
library(dplyr)
library(imputeTS)
library(forecast)
library(stats)
library(plotly)

Raw_data =riem_measures("SFO", date_start = "2010-01-01")

Gapfilled <- Raw_data %>%
  dplyr::mutate(tmpfts = ts(data = .$tmpf,
                                   start = min(time(valid)),
                                   frequency = 24)) %>%
  dplyr::mutate(ts_interpFilled = forecast::na.interp(tmpfts) %>% as.numeric(),
                na_seadecKalman = imputeTS::na_seadec(tmpfts, algorithm = "kalman"),
                na_seadecma = imputeTS::na_seadec(tmpf, algorithm = "ma"),
                #              na_kalman = imputeTS::na_kalman(tmpfts, model = "auto.arima"),
                tsclean = forecast::tsclean (tmpfts) %>% as.numeric()
  )


plot_ly(Gapfilled, x = ~valid) %>%
  add_trace(y = ~ tmpf, name = 'Actuals',mode = 'lines', type  = 'scatter' ) %>%
  add_trace(y = ~ts_interpFilled, name = 'forecast::na.interp', mode = 'lines', type  = 'scatter') %>%
  add_trace(y = ~na_seadecma, name = 'imputeTS::na_seadecma', mode = 'lines', type  = 'scatter') %>%
  add_trace(y = ~tsclean, name = 'forecast::tsclean', mode = 'lines', type  = 'scatter')  %>%
  # add_trace(y = ~na_kalman, name = 'imputeTS::na_kalman', mode = 'lines', type  = 'scatter') %>%
  add_trace(y = ~na_seadecKalman, name = 'imputeTS::na_seadecKalman', mode = 'lines', type  = 'scatter')
4

0 回答 0