bin
数据如何
import pandas as pd
import numpy as np # for test data
import random # for test data
# setup a sample dataframe; creates 1.5 months of hourly observations
np.random.seed(365)
random.seed(365)
data = {'date': pd.bdate_range('2020-09-21', freq='h', periods=1100).tolist(),
'x': np.random.randint(10, size=(1100))}
df = pd.DataFrame(data)
# the date column of the sample data is already in a datetime format
# if the date column is not a datetime, then uncomment the following line
# df.date= pd.to_datetime(df.date)
# define the bins
bins = [0, 6, 12, 18, 24]
# add custom labels if desired
labels = ['00:00-05:59', '06:00-11:59', '12:00-17:59', '18:00-23:59']
# add the bins to the dataframe
df['Time Bin'] = pd.cut(df.date.dt.hour, bins, labels=labels, right=False)
# display(df.head())
date x Time Bin
0 2020-09-21 00:00:00 2 00:00-05:59
1 2020-09-21 01:00:00 4 00:00-05:59
2 2020-09-21 02:00:00 1 00:00-05:59
3 2020-09-21 03:00:00 5 00:00-05:59
4 2020-09-21 04:00:00 2 00:00-05:59
# display(df.tail())
date x Time Bin
1095 2020-11-05 15:00:00 2 12:00-17:59
1096 2020-11-05 16:00:00 3 12:00-17:59
1097 2020-11-05 17:00:00 1 12:00-17:59
1098 2020-11-05 18:00:00 2 18:00-23:59
1099 2020-11-05 19:00:00 2 18:00-23:59
通过...分组'Time Bin'
# groupby Time Bin and aggregate a list for the observations, and mean
dfg = df.groupby('Time Bin', as_index=False)['x'].agg([list, 'mean'])
# change the column names, if desired
dfg.columns = ['X Observations', 'X mean']
# display(dfg)
X Observations X mean
Time Bin
00:00-05:59 [2, 4, 1, 5, 2, 2, ...] 4.416667
06:00-11:59 [9, 8, 4, 0, 3, 3, ...] 4.760870
12:00-17:59 [7, 7, 7, 0, 8, 4, ...] 4.384058
18:00-23:59 [3, 2, 6, 2, 6, 8, ...] 4.459559