2

I have a code that was given to me that runs an ARIMA model putting weight on more recent errors, it gives excellent results, much better than simple ARIMA, but i do not understand the methodology behind it. If you can understand whats going on and why and how it works then i would really appreciate it :)

The code that i would like explaining is from the #---Weighting---

suppressMessages(library(lmtest))
suppressMessages(library(tseries))
suppressMessages(library(forecast))
suppressMessages(library(TTR))
#-------------------------------------------------------------------------------
Input.data <- matrix(c("8Q1","8Q2","8Q3","8Q4","9Q1","9Q2","9Q3","9Q4","10Q1","10Q2","10Q3","10Q4","11Q1","11Q2","11Q3","11Q4","12Q1","12Q2","12Q3","12Q4","13Q1","13Q2","13Q3","13Q4","14Q1","14Q2","14Q3",5403.675741,6773.504993,7231.117289,7835.55156,5236.709983,5526.619467,6555.781711,11464.72728,7210.068674,7501.610403,8670.903486,10872.93518,8209.022658,8153.393088,10196.44775,13244.50201,8356.732878,10188.44157,10601.32205,12617.82102,11786.52641,10044.98676,11006.0051,15101.9456,10992.27282,11421.18922,10731.31198),ncol=2,byrow=FALSE)

#-------------------------------------------------------------------------------
# Maximum seasonal differences allowed. For typical series, 0 is recommended.

max.sdiff <- 2 

#-------------------------------------------------------------------------------
# Force seasonality

arima.force.seasonality <- "y"

#-------------------------------------------------------------------------------
# The frequency of the data. 1/4 for QUARTERLY, 1/12 for MONTHLY

Frequency <- 1/4

#-------------------------------------------------------------------------------
# How many quarters/months to forecast

Forecast.horizon <- 4

#-------------------------------------------------------------------------------
# The first date in the series. Use c(8, 1) to denote 2008 q1

Start.date <- c(8, 1)

#-------------------------------------------------------------------------------
# The dates of the forecasts

Forecast.dates <- c("14Q4", "15Q1", "15Q2", "15Q3")

#-------------------------------------------------------------------------------
# Set if the data should be logged. Takes value "s" (lets script choose logging)
#"level" (forces levels) or "log" (forces logs)

force.log <- "s"

#-------------------------------------------------------------------------------
# Selects the data column from Input.data

Data.col <- as.numeric(Input.data[, length(Input.data[1, ])])

#-------------------------------------------------------------------------------
# Turns the Data.col into a time-series

Data.col.ts <- ts(Data.col, deltat=Frequency, start = Start.date)

#-------------------------------------------------------------------------------
# A character vector of the dates from Input.data

Dates.col <- as.character(Input.data[,1])

#-------------------------------------------------------------------------------
# Starts the testing to see if the data should be logged

transform.method <- round(BoxCox.lambda(Data.col.ts, method = "loglik"), 5)

log.values <- seq(0, 0.24999, by = 0.00001)
sqrt.values <- seq(0.25, 0.74999, by = 0.00001)

which.transform.log <- transform.method %in% log.values
which.transform.sqrt <- transform.method %in% sqrt.values

if (which.transform.log == "TRUE"){
  as.log <- "log"
  Data.new <- log(Data.col.ts)
} else {
  if (which.transform.sqrt == "TRUE"){
    as.log <- "sqrt"
    Data.new <- sqrt(Data.col.ts)
  } else {
    as.log <- "no"
    Data.new <- Data.col.ts
  }
}

#----- Weighting ---------------------------------------------------------------
fweight <- function(x){
  PatX <- 0.5+x 
  return(PatX)
}

integ1 <- integrate(fweight, lower = 0.00, upper = 1)

valinteg <- 2*integ1$value 

#Split the integral to several intervals, and pick the weights accordingly

integvals <- rep(0, length.out = length(Data.new))
for (i in 1:length(Data.new)){
  integi <- integrate(fweight, lower = (i-1)/length(Data.new), upper= i/length(Data.new))
  integvals[i] <- 2*integi$value
}

suppressWarnings(kpssW <- kpss.test(Data.new, null="Level"))

suppressWarnings(ppW <- tryCatch({
  ppW <- pp.test(Data.new, alternative = "stationary")},
  error = function(ppW){
    ppW <- list(error = "TRUE", p.value = 0.99)
    }))

suppressWarnings(adfW <- adf.test(Data.new, alternative = "stationary",
                                  k = trunc((length(Data.new) - 1)^(1/3))))

suppressWarnings(if (kpssW$p.value < 0.05 |
                       ppW$p.value > 0.05 |
                       adfW$p.value > 0.05){
  ndiffsW = 1
  } else {
    ndiffsW = 0
    })

aaw <- auto.arima(Data.new,
                  max.D         = max.sdiff,
                  d             = ndiffsW,
                  seasonal      = TRUE,
                  allowdrift    = FALSE,
                  stepwise      = FALSE,
                  trace         = FALSE,
                  seasonal.test = "ch")

order.arima <- c(aaw$arma[1], aaw$arma[6] , aaw$arma[2])

order.seasonal.arima <- c(aaw$arma[3], aaw$arma[7], aaw$arma[4])

if (sum(aaw$arma[1:2]) == 0){
  order.arima[1] <- 1
  } else {
    NULL
  }

if (arima.force.seasonality == "y"){
  if(sum(aaw$arma[3:4]) == 0){
    order.seasonal.arima[1] <- 1
    } else {
      NULL
    }
  } else {
    NULL
  }

#----- ARIMA -------------------------------------------------------------------
# Fits an ARIMA model with the orders set
stAW <- Arima(Data.new,
              order    = order.arima,
              seasonal = list(order = order.seasonal.arima),
              method   ="ML")

parSW <- stAW$coef

WMAEOPT <- function(parSW){
  ArimaW <- Arima(Data.new,
                  order         = order.arima,
                  seasonal      = list(order = order.seasonal.arima), 
                  include.drift = FALSE,
                  method        = "ML",
                  fixed         = c(parSW))
  errAR <- c(abs(resid(ArimaW)))
  WMAE <- t(errAR) %*% integvals 
  return(WMAE)
}

OPTWMAE <- optim(parSW,
                 WMAEOPT,
                 method = "SANN",
                 set.seed(2),
                 control = list(fnscale = 1, maxit = 5000))

parS3 <- OPTWMAE$par

Arima.Data.new <- Arima(Data.new, order = order.arima, seasonal=list(order=order.seasonal.arima), 
                        include.drift=FALSE, method = "ML", fixed = c(parS3))
4

0 回答 0