尝试这个:
x = df[pd.isnull(df.C)]
splitter = x.reset_index()[(x['id'].diff().fillna(0) > 1).reset_index(drop=True)].index
dfs = np.split(x, splitter)
for x in dfs:
print(x, '\n')
输出:
In [264]: for x in l:
.....: print(x, '\n')
.....:
id A B C
4 5 87699 475132 NaN
5 6 52734 4298894 NaN
id A B C
7 8 89872 18103 NaN
id A B C
11 12 206 2918137 NaN
12 13 554 3918072 NaN
id A B C
14 15 2349243 4918064 NaN
解释:
In [267]: x = df[pd.isnull(df.C)]
In [268]: x
Out[268]:
id A B C
4 5 87699 475132 NaN
5 6 52734 4298894 NaN
7 8 89872 18103 NaN
11 12 206 2918137 NaN
12 13 554 3918072 NaN
14 15 2349243 4918064 NaN
In [269]: x.ix[pd.isnull(df.C), 'id']
Out[269]:
4 5
5 6
7 8
11 12
12 13
14 15
Name: id, dtype: int64
In [270]: x['id'].diff().fillna(0)
Out[270]:
4 0.0
5 1.0
7 2.0
11 4.0
12 1.0
14 2.0
Name: id, dtype: float64
In [271]: x['id'].diff().fillna(0) > 1
Out[271]:
4 False
5 False
7 True
11 True
12 False
14 True
Name: id, dtype: bool
In [272]: (x['id'].diff().fillna(0) > 1).reset_index(drop=True)
Out[272]:
0 False
1 False
2 True
3 True
4 False
5 True
Name: id, dtype: bool
In [273]: x.reset_index()[x['id'].diff().fillna(0) > 1).reset_index(drop=True)]
Out[273]:
index id A B C
2 7 8 89872 18103 NaN
3 11 12 206 2918137 NaN
5 14 15 2349243 4918064 NaN
In [274]: x.reset_index()[(x['id'].diff().fillna(0) > 1).reset_index(drop=True)].index
Out[274]: Int64Index([2, 3, 5], dtype='int64')