我正在尝试使用多元线性回归来分析一些时间序列数据及其滞后。基本上变量是一些货币汇率及其滞后1和滞后2。代码如下。我试着检查每个变量,没有任何异常..
rate = pd.read_csv('P2training.csv', header=0)
#change date format in csv
rate['Date'] = pd.to_datetime(rate['Date'], format='%Y-%m-%d')
rate.set_index('Date', inplace=True, drop=True)
lags = [1,2]
lagdata = rate
for i in lags:
tmp = rate.shift(i).copy();
lagdata = lagdata.join(tmp, rsuffix='_lag{}'.format(i));
# fit the linear regression models
collist = list(lagdata.columns);
collist.remove('AUD/USD')
collist.remove('GBP/USD')
collist.remove('CAD/USD')
collist.remove('NLG/USD')
collist.remove('FRF/USD')
collist.remove('DEM/USD')
collist.remove('JPY/USD')
collist.remove('CHF/USD')
form = 'JPY/USD' + '~' + '+'.join(collist);
lagdata.dropna(inplace=True)
model = smf.ols(formula=form, data = lagdata).fit()
使用 smf.ols 时在最后一步出现错误。一些名称错误表示某些变量未定义。
NameError Traceback (most recent call last)
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site- packages\patsy\compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
116 try:
--> 117 return f(*args, **kwargs)
118 except Exception as e:
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\eval.py in eval(self, expr, source_name, inner_namespace)
165 return eval(code, {}, VarLookupDict([inner_namespace]
--> 166 + self._namespaces))
167
<string> in <module>()
NameError: name 'USD_lag2' is not defined
The above exception was the direct cause of the following exception:
PatsyError Traceback (most recent call last)
<ipython-input-26-1985b8d39238> in <module>()
51 #print(collist)
52 #print(lagdata)
---> 53 model = smf.ols(formula=form, data = lagdata).fit()
54
55 #print(model.summary())
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site- packages\statsmodels\base\model.py in from_formula(cls, formula, data, subset, drop_cols, *args, **kwargs)
153
154 tmp = handle_formula_data(data, None, formula, depth=eval_env,
--> 155 missing=missing)
156 ((endog, exog), missing_idx, design_info) = tmp
157
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\statsmodels\formula\formulatools.py in handle_formula_data(Y, X, formula, depth, missing)
63 if data_util._is_using_pandas(Y, None):
64 result = dmatrices(formula, Y, depth, return_type='dataframe',
---> 65 NA_action=na_action)
66 else:
67 result = dmatrices(formula, Y, depth, return_type='dataframe',
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\highlevel.py in dmatrices(formula_like, data, eval_env, NA_action, return_type)
308 eval_env = EvalEnvironment.capture(eval_env, reference=1)
309 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 310 NA_action, return_type)
311 if lhs.shape[1] == 0:
312 raise PatsyError("model is missing required outcome variables")
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\highlevel.py in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
163 return iter([data])
164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
--> 165 NA_action)
166 if design_infos is not None:
167 return build_design_matrices(design_infos, data,
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\highlevel.py in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
68 data_iter_maker,
69 eval_env,
---> 70 NA_action)
71 else:
72 return None
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\build.py in design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action)
694 factor_states,
695 data_iter_maker,
--> 696 NA_action)
697 # Now we need the factor infos, which encapsulate the knowledge of
698 # how to turn any given factor into a chunk of data:
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\build.py in _examine_factor_types(factors, factor_states, data_iter_maker, NA_action)
441 for data in data_iter_maker():
442 for factor in list(examine_needed):
--> 443 value = factor.eval(factor_states[factor], data)
444 if factor in cat_sniffers or guess_categorical(value):
445 if factor not in cat_sniffers:
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\eval.py in eval(self, memorize_state, data)
564 return self._eval(memorize_state["eval_code"],
565 memorize_state,
--> 566 data)
567
568 __getstate__ = no_pickling
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\eval.py in _eval(self, code, memorize_state, data)
549 memorize_state["eval_env"].eval,
550 code,
--> 551 inner_namespace=inner_namespace)
552
553 def memorize_chunk(self, state, which_pass, data):
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
122 origin)
123 # Use 'exec' to hide this syntax from the Python 2 parser:
--> 124 exec("raise new_exc from e")
125 else:
126 # In python 2, we just let the original exception escape -- better
C:\Users\yaojia\AppData\Local\Continuum\Anaconda3\lib\site-packages\patsy\compat.py in <module>()
PatsyError: Error evaluating factor: NameError: name 'USD_lag2' is not defined
JPY/USD~AUD/USD_lag1+GBP/USD_lag1+CAD/USD_lag1+NLG/USD_lag1+FRF/USD_lag1+DEM/USD_lag1+JPY/USD_lag1+CHF/USD_lag1+AUD/USD_lag2+GBP/USD_lag2+CAD/USD_lag2+NLG/USD_lag2+FRF/USD_lag2+DEM/USD_lag2+JPY/USD_lag2+CHF/USD_lag2