我知道我可以通过执行以下操作来应用 numpy 方法:
dataList
是DataFrame
s 的列表(相同的列/行)。
testDF = (concat(dataList, axis=1, keys=range(len(dataList)))
.swaplevel(0, 1, axis=1)
.sortlevel(axis=1)
.groupby(level=0, axis=1))
testDF.aggregate(numpy.mean)
testDF.aggregate(numpy.var)
等等。但是,如果我想计算均值 (sem) 的标准误差怎么办?
我试过:
testDF.aggregate(scipy.stats.sem)
但它给出了一个令人困惑的错误。有人知道怎么做吗?scipy.stats 方法有什么不同?
这是一些为我重现错误的代码:
from scipy import stats as st
import pandas
import numpy as np
df_list = []
for ii in range(30):
df_list.append(pandas.DataFrame(np.random.rand(600, 10),
columns = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']))
testDF = (pandas.concat(df_list, axis=1, keys=range(len(df_list)))
.swaplevel(0, 1, axis=1)
.sortlevel(axis=1)
.groupby(level=0, axis=1))
testDF.aggregate(st.sem)
这是错误消息:
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-1-184cee8fb2ce> in <module>()
12 .groupby(level=0, axis=1))
13
---> 14 testDF.aggregate(st.sem)
/Library/Frameworks/EPD64.framework/Versions/7.3/lib/python2.7/site-packages/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
1177 return self._python_agg_general(arg, *args, **kwargs)
1178 else:
-> 1179 result = self._aggregate_generic(arg, *args, **kwargs)
1180
1181 if not self.as_index:
/Library/Frameworks/EPD64.framework/Versions/7.3/lib/python2.7/site-packages/pandas/core/groupby.py in _aggregate_generic(self, func, *args, **kwargs)
1248 else:
1249 result = DataFrame(result, index=obj.index,
-> 1250 columns=result_index)
1251 else:
1252 result = DataFrame(result)
/Library/Frameworks/EPD64.framework/Versions/7.3/lib/python2.7/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
300 mgr = self._init_mgr(data, index, columns, dtype=dtype, copy=copy)
301 elif isinstance(data, dict):
--> 302 mgr = self._init_dict(data, index, columns, dtype=dtype)
303 elif isinstance(data, ma.MaskedArray):
304 mask = ma.getmaskarray(data)
/Library/Frameworks/EPD64.framework/Versions/7.3/lib/python2.7/site-packages/pandas/core/frame.py in _init_dict(self, data, index, columns, dtype)
389
390 # consolidate for now
--> 391 mgr = BlockManager(blocks, axes)
392 return mgr.consolidate()
393
/Library/Frameworks/EPD64.framework/Versions/7.3/lib/python2.7/site-packages/pandas/core/internals.py in __init__(self, blocks, axes, do_integrity_check)
329
330 if do_integrity_check:
--> 331 self._verify_integrity()
332
333 def __nonzero__(self):
/Library/Frameworks/EPD64.framework/Versions/7.3/lib/python2.7/site-packages/pandas/core/internals.py in _verify_integrity(self)
404 mgr_shape = self.shape
405 for block in self.blocks:
--> 406 assert(block.values.shape[1:] == mgr_shape[1:])
407 tot_items = sum(len(x.items) for x in self.blocks)
408 assert(len(self.items) == tot_items)
AssertionError: