多键问题的解决方案:
在此示例中,数据具有键 [日期、地区、类型]。Date 是原始数据帧上的索引。
import os
import pandas as pd
#sort to make indexing faster
df.sort_values(by=['date','region','type'], inplace=True)
#collect all possible regions and types
regions = list(set(df['region']))
types = list(set(df['type']))
#record column names
df_cols = df.columns
#delete ffill_df.csv so we can begin anew
try:
os.remove('ffill_df.csv')
except FileNotFoundError:
pass
# steps:
# 1) grab rows with a particular region and type
# 2) use forwardfill to fill nulls
# 3) use backwardfill to fill remaining nulls
# 4) append to file
for r in regions:
for t in types:
group_df = df[(df.region == r) & (df.type == t)].copy()
group_df.fillna(method='ffill', inplace=True)
group_df.fillna(method='bfill', inplace=True)
group_df.to_csv('ffill_df.csv', mode='a', header=False, index=True)
检查结果:
#load in the ffill_df
ffill_df = pd.read_csv('ffill_df.csv', header=None, index_col=None)
ffill_df.columns = df_reindexed_cols
ffill_df.index= ffill_df.date
ffill_df.drop('date', axis=1, inplace=True)
ffill_df.head()
#compare new and old dataframe
print(df.shape)
print(ffill_df.shape)
print()
print(pd.isnull(ffill_df).sum())