我正在尝试在 python 中使用 arima 模型。我尝试使用下面的 statsmodel 和 pandas 的代码。但我在下面收到“ValueError: Given a pandas object and the index does not contain dates”错误。我发现另一篇文章提到了同样的错误,他们使用 as_matrix 将数据帧更改为矩阵,我尝试过,但它没有解决我的问题。我刚收到一个新的 numpy 数组错误。任何提示将不胜感激。我在 r 中有更多的经验,但是 r 在处理时间序列时也有很多令人头疼的问题。
样本数据描述:
y.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4347 entries, 0 to 4346
Data columns (total 2 columns):
lastSaleDate 4347 non-null datetime64[ns]
lastSaleAmount 4347 non-null float64
dtypes: datetime64[ns](1), float64(1)
memory usage: 68.0 KB
Sample Data:
lastSaleDate lastSaleAmount
1 1997-08-13 102900.000000
2 1997-08-14 84833.333333
3 1997-08-15 26171.000000
4 1997-08-19 167000.000000
5 1997-08-20 107500.000000
6 1997-08-21 175000.000000
7 1997-08-25 56000.000000
8 1997-08-26 74300.000000
9 1997-08-28 118500.000000
10 1997-08-29 305333.333333
11 1997-09-02 50000.000000
12 1997-09-03 116950.000000
13 1997-09-04 58000.000000
14 1997-09-08 72250.000000
15 1997-09-10 60000.000000
16 1997-09-12 75000.000000
17 1997-09-15 57700.000000
18 1997-09-18 21000.000000
19 1997-09-19 47500.000000
20 1997-09-22 49900.000000
21 1997-09-23 81750.000000
22 1997-09-25 57000.000000
23 1997-09-26 69000.000000
24 1997-09-29 396000.000000
25 1997-09-30 213500.000000
26 1997-10-01 152500.000000
27 1997-10-02 99666.666667
28 1997-10-03 83000.000000
29 1997-10-06 84500.000000
30 1997-10-07 157000.000000
31 1997-10-15 272000.000000
32 1997-10-16 45900.000000
33 1997-10-17 68500.000000
34 1997-10-20 108000.000000
35 1997-10-21 66800.000000
36 1997-10-22 115633.333333
37 1997-10-24 62015.000000
38 1997-10-27 113000.000000
39 1997-10-28 41000.000000
40 1997-10-31 114950.000000
41 1997-11-03 71000.000000
42 1997-11-07 20500.000000
43 1997-11-13 68500.000000
44 1997-11-17 65000.000000
45 1997-11-18 94900.000000
46 1997-11-19 89900.000000
47 1997-11-21 213833.333333
48 1997-11-24 63300.000000
49 1997-11-25 227825.000000
代码:
lag=y['lastSaleAmount'].shift()
mod = sm.tsa.ARIMA(endog=y['lastSaleAmount'],exog=lag,order=[1,1,0])
results = mod.fit()
print(results.summary().tables[1])
错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-61-0120f3323d31> in <module>()
11 #y=y.as_matrix()
12
---> 13 mod = sm.tsa.ARIMA(endog=y['lastSaleAmount'],exog=lag,order=[1,1,0])
14
15 results = mod.fit()
/Users/username/anaconda/lib/python2.7/site-packages/statsmodels/tsa/arima_model.pyc in __new__(cls, endog, order, exog, dates, freq, missing)
963 else:
964 mod = super(ARIMA, cls).__new__(cls)
--> 965 mod.__init__(endog, order, exog, dates, freq, missing)
966 return mod
967
/Users/username/anaconda/lib/python2.7/site-packages/statsmodels/tsa/arima_model.pyc in __init__(self, endog, order, exog, dates, freq, missing)
973 # in the predict method
974 raise ValueError("d > 2 is not supported")
--> 975 super(ARIMA, self).__init__(endog, (p, q), exog, dates, freq, missing)
976 self.k_diff = d
977 self._first_unintegrate = unintegrate_levels(self.endog[:d], d)
/Users/username/anaconda/lib/python2.7/site-packages/statsmodels/tsa/arima_model.pyc in __init__(self, endog, order, exog, dates, freq, missing)
443 def __init__(self, endog, order, exog=None, dates=None, freq=None,
444 missing='none'):
--> 445 super(ARMA, self).__init__(endog, exog, dates, freq, missing=missing)
446 exog = self.data.exog # get it after it's gone through processing
447 _check_estimable(len(self.endog), sum(order))
/Users/username/anaconda/lib/python2.7/site-packages/statsmodels/tsa/base/tsa_model.pyc in __init__(self, endog, exog, dates, freq, missing)
40 def __init__(self, endog, exog=None, dates=None, freq=None, missing='none'):
41 super(TimeSeriesModel, self).__init__(endog, exog, missing=missing)
---> 42 self._init_dates(dates, freq)
43
44 def _init_dates(self, dates, freq):
/Users/username/anaconda/lib/python2.7/site-packages/statsmodels/tsa/base/tsa_model.pyc in _init_dates(self, dates, freq)
49 if (not datetools._is_datetime_index(dates) and
50 isinstance(self.data, data.PandasData)):
---> 51 raise ValueError("Given a pandas object and the index does "
52 "not contain dates")
53 if not freq:
ValueError: Given a pandas object and the index does not contain dates