python - 将 exog 变量添加到 SARIMAX 网格搜索中

Question

我遵循了 Jason Brownlee 关于 SARIMAX 网格搜索的优秀教程，并在使用单变量系列进行预测时使其工作。但是，我需要添加一个外生变量。我知道要做到这一点，您需要在 SARIMAX 调用中添加一个 exog 参数，并在我的主脚本中使用它，但无法弄清楚如何将它包含在本教程中使用的函数范围中。

我的代码如下：

# one-step sarima forecast
def sarima_forecast(history, config):
    order, sorder, trend, exog = config
    # define model
    model = SARIMAX(history, exog=exog[:len(history)].tolist(), order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False)
    # fit model
    model_fit = model.fit(disp=False)
    # make one step forecast
    yhat = model_fit.predict(len(history), len(history))
    return yhat[0]

# root mean squared error or rmse
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
    return data[:-n_test], data[-n_test:]

# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        yhat = sarima_forecast(history, cfg)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error
    error = measure_rmse(test, predictions)
    return error

# grid search configs
def grid_search(data, cfg_list, n_test, parallel=True):
    scores = None
    if parallel:
        # execute configs in parallel
        executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing')
        tasks = (delayed(score_model)(data, n_test, cfg) for cfg in cfg_list)
        scores = executor(tasks)
    else:
        scores = [score_model(data, n_test, cfg) for cfg in cfg_list]
    # remove empty results
    scores = [r for r in scores if r[1] != None]
    # sort configs by error, asc
    scores.sort(key=lambda tup: tup[1])
    return scores

# score a model, return None on failure
def score_model(data, n_test, cfg, debug=False):
    result = None
    # convert config to a key
    key = str(cfg)
    # show all warnings and fail on exception if debugging
    if debug:
        result = walk_forward_validation(data, n_test, cfg)
    else:
        # one failure during model validation suggests an unstable config
        try:
            # never show warnings when grid searching, too noisy
            with catch_warnings():
                filterwarnings("ignore")
                result = walk_forward_validation(data, n_test, cfg)
        except:
            error = None
    # check for an interesting result
    if result is not None:
        print(' > Model[%s] %.3f' % (key, result))
    return (key, result)

# create a set of sarima configs to try
def sarima_configs(exog, seasonal=[0]):
  models = list()
  # define config lists
  e = exog
  p_params = [0, 1, 2]
  d_params = [0, 1]
  q_params = [0, 1, 2]
  t_params = ['n','c','t','ct']
  P_params = [0, 1, 2]
  D_params = [0, 1]
  Q_params = [0, 1, 2]
  m_params = seasonal
    # create config instances
  for p in p_params:
    for d in d_params:
      for q in q_params:
        for t in t_params:
          for P in P_params:
            for D in D_params:
              for Q in Q_params:
                for m in m_params:
                  
                  cfg = [(p,d,q), (P,D,Q,m), t, e]
                  models.append(cfg)
  return models
data = difference.values
    
exog = high_f.values

cfg_list = sarima_configs(exog, seasonal=[0,4])

scores = grid_search(data, cfg_list, n_test)

print('done')

for cfg, error in scores[:3]:
      print(cfg, error)

sarimax_scores = pd.DataFrame(list(scores), columns=['config','error'])

以上是摘录，如果我在核心代码中遗漏了任何内容，敬请见谅。我尝试了各种添加它们的方法，包括为几个函数添加额外的参数，但由于它们都是嵌套的，似乎没有一个起作用，所以我决定将它添加为配置的一部分。

作为参考，data和exog都是长度为 31 的数组。

目前scores返回为[]，这意味着我不能在我的主模型中使用它。任何帮助将非常感激。

score 0 · Accepted Answer

我意识到我只需要将外生变量添加到预测函数中，因此它现在可以使用：

# one-step sarima forecast
def sarima_forecast(history, config):
    order, sorder, trend, exog = config
    # define model
    model = SARIMAX(history, exog=exog[:len(history)].tolist(), order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False)
    # fit model
    model_fit = model.fit(disp=False)
    # make one step forecast
    yhat = model_fit.predict(len(history), len(history), exog=exog.iloc[:len(history)+1]
    return yhat[0]

值得注意的是，我还需要将 exog 恢复为 df，而不是数组。

python - 将 exog 变量添加到 SARIMAX 网格搜索中

1 回答 1

Related

Reference