当我尝试使用 window='1M' 时,我发现 user2689410 代码中断了,因为营业月的增量抛出了这个错误:
AttributeError: 'MonthEnd' object has no attribute 'delta'
我添加了直接传递相对时间增量的选项,因此您可以为用户定义的时间段执行类似的操作。
感谢您的指点,这是我的尝试-希望它有用。
def rolling_mean(data, window, min_periods=1, center=False):
""" Function that computes a rolling mean
Reference:
http://stackoverflow.com/questions/15771472/pandas-rolling-mean-by-time-interval
Parameters
----------
data : DataFrame or Series
If a DataFrame is passed, the rolling_mean is computed for all columns.
window : int, string, Timedelta or Relativedelta
int - number of observations used for calculating the statistic,
as defined by the function pd.rolling_mean()
string - must be a frequency string, e.g. '90S'. This is
internally converted into a DateOffset object, and then
Timedelta representing the window size.
Timedelta / Relativedelta - Can directly pass a timedeltas.
min_periods : int
Minimum number of observations in window required to have a value.
center : bool
Point around which to 'center' the slicing.
Returns
-------
Series or DataFrame, if more than one column
"""
def f(x, time_increment):
"""Function to apply that actually computes the rolling mean
:param x:
:return:
"""
if not center:
# adding a microsecond because when slicing with labels start
# and endpoint are inclusive
start_date = x - time_increment + timedelta(0, 0, 1)
end_date = x
else:
start_date = x - time_increment/2 + timedelta(0, 0, 1)
end_date = x + time_increment/2
# Select the date index from the
dslice = col[start_date:end_date]
if dslice.size < min_periods:
return np.nan
else:
return dslice.mean()
data = DataFrame(data.copy())
dfout = DataFrame()
if isinstance(window, int):
dfout = pd.rolling_mean(data, window, min_periods=min_periods, center=center)
elif isinstance(window, basestring):
time_delta = pd.datetools.to_offset(window).delta
idx = Series(data.index.to_pydatetime(), index=data.index)
for colname, col in data.iteritems():
result = idx.apply(lambda x: f(x, time_delta))
result.name = colname
dfout = dfout.join(result, how='outer')
elif isinstance(window, (timedelta, relativedelta)):
time_delta = window
idx = Series(data.index.to_pydatetime(), index=data.index)
for colname, col in data.iteritems():
result = idx.apply(lambda x: f(x, time_delta))
result.name = colname
dfout = dfout.join(result, how='outer')
if dfout.columns.size == 1:
dfout = dfout.ix[:, 0]
return dfout
以及具有 3 天时间窗口来计算平均值的示例:
from pandas import Series, DataFrame
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
from dateutil.relativedelta import relativedelta
idx = [datetime(2011, 2, 7, 0, 0),
datetime(2011, 2, 7, 0, 1),
datetime(2011, 2, 8, 0, 1, 30),
datetime(2011, 2, 9, 0, 2),
datetime(2011, 2, 10, 0, 4),
datetime(2011, 2, 11, 0, 5),
datetime(2011, 2, 12, 0, 5, 10),
datetime(2011, 2, 12, 0, 6),
datetime(2011, 2, 13, 0, 8),
datetime(2011, 2, 14, 0, 9)]
idx = pd.Index(idx)
vals = np.arange(len(idx)).astype(float)
s = Series(vals, index=idx)
# Now try by passing the 3 days as a relative time delta directly.
rm = rolling_mean(s, window=relativedelta(days=3))
>>> rm
Out[2]:
2011-02-07 00:00:00 0.0
2011-02-07 00:01:00 0.5
2011-02-08 00:01:30 1.0
2011-02-09 00:02:00 1.5
2011-02-10 00:04:00 3.0
2011-02-11 00:05:00 4.0
2011-02-12 00:05:10 5.0
2011-02-12 00:06:00 5.5
2011-02-13 00:08:00 6.5
2011-02-14 00:09:00 7.5
Name: 0, dtype: float64