我遵循了 Jason Brownlee 关于 SARIMAX 网格搜索的优秀教程,并在使用单变量系列进行预测时使其工作。但是,我需要添加一个外生变量。我知道要做到这一点,您需要在 SARIMAX 调用中添加一个 exog 参数,并在我的主脚本中使用它,但无法弄清楚如何将它包含在本教程中使用的函数范围中。
我的代码如下:
# one-step sarima forecast
def sarima_forecast(history, config):
order, sorder, trend, exog = config
# define model
model = SARIMAX(history, exog=exog[:len(history)].tolist(), order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False)
# fit model
model_fit = model.fit(disp=False)
# make one step forecast
yhat = model_fit.predict(len(history), len(history))
return yhat[0]
# root mean squared error or rmse
def measure_rmse(actual, predicted):
return sqrt(mean_squared_error(actual, predicted))
# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
return data[:-n_test], data[-n_test:]
# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
predictions = list()
# split dataset
train, test = train_test_split(data, n_test)
# seed history with training dataset
history = [x for x in train]
# step over each time-step in the test set
for i in range(len(test)):
# fit model and make forecast for history
yhat = sarima_forecast(history, cfg)
# store forecast in list of predictions
predictions.append(yhat)
# add actual observation to history for the next loop
history.append(test[i])
# estimate prediction error
error = measure_rmse(test, predictions)
return error
# grid search configs
def grid_search(data, cfg_list, n_test, parallel=True):
scores = None
if parallel:
# execute configs in parallel
executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing')
tasks = (delayed(score_model)(data, n_test, cfg) for cfg in cfg_list)
scores = executor(tasks)
else:
scores = [score_model(data, n_test, cfg) for cfg in cfg_list]
# remove empty results
scores = [r for r in scores if r[1] != None]
# sort configs by error, asc
scores.sort(key=lambda tup: tup[1])
return scores
# score a model, return None on failure
def score_model(data, n_test, cfg, debug=False):
result = None
# convert config to a key
key = str(cfg)
# show all warnings and fail on exception if debugging
if debug:
result = walk_forward_validation(data, n_test, cfg)
else:
# one failure during model validation suggests an unstable config
try:
# never show warnings when grid searching, too noisy
with catch_warnings():
filterwarnings("ignore")
result = walk_forward_validation(data, n_test, cfg)
except:
error = None
# check for an interesting result
if result is not None:
print(' > Model[%s] %.3f' % (key, result))
return (key, result)
# create a set of sarima configs to try
def sarima_configs(exog, seasonal=[0]):
models = list()
# define config lists
e = exog
p_params = [0, 1, 2]
d_params = [0, 1]
q_params = [0, 1, 2]
t_params = ['n','c','t','ct']
P_params = [0, 1, 2]
D_params = [0, 1]
Q_params = [0, 1, 2]
m_params = seasonal
# create config instances
for p in p_params:
for d in d_params:
for q in q_params:
for t in t_params:
for P in P_params:
for D in D_params:
for Q in Q_params:
for m in m_params:
cfg = [(p,d,q), (P,D,Q,m), t, e]
models.append(cfg)
return models
data = difference.values
exog = high_f.values
cfg_list = sarima_configs(exog, seasonal=[0,4])
scores = grid_search(data, cfg_list, n_test)
print('done')
for cfg, error in scores[:3]:
print(cfg, error)
sarimax_scores = pd.DataFrame(list(scores), columns=['config','error'])
以上是摘录,如果我在核心代码中遗漏了任何内容,敬请见谅。我尝试了各种添加它们的方法,包括为几个函数添加额外的参数,但由于它们都是嵌套的,似乎没有一个起作用,所以我决定将它添加为配置的一部分。
作为参考,data
和exog
都是长度为 31 的数组。
目前scores
返回为[]
,这意味着我不能在我的主模型中使用它。任何帮助将非常感激。