利用:
#convert columns to datetimes
df['Joining Date'] = pd.to_datetime(df['Joining Date'], dayfirst=True)
df['Attrition Date'] = pd.to_datetime(df['Attrition Date'], dayfirst=True)
#add new rows by first January of minimal year per groups
df1 = df.groupby('Office')['Joining Date'].min() - pd.offsets.DateOffset(month=1, day=1)
df = df.append(df1.reset_index()).sort_values(['Office','Joining Date'])
#replace missing values in Attrition Date by maximal date with next month
#replace missing values in Joining Date by maximal date with next month
next_month = (df.groupby('Office')['Attrition Date'].transform('max') +
pd.offsets.DateOffset(months=1))
next_month1 = (df.groupby('Office')['Joining Date'].transform('max') +
pd.offsets.DateOffset(months=1))
df['Attrition Date'] = df['Attrition Date'].fillna(next_month).fillna(next_month1)
#explode start and end datetimes converted to months with years
f = lambda x: pd.date_range(x['Joining Date'],
x['Attrition Date'], freq='M').strftime('%b-%y')
df['Month & Year'] = df.apply(f, axis=1)
#count number of Employee ID with omit missing values
df = (df.explode('Month & Year')
.groupby(['Office','Month & Year'], sort=False)['Employee ID']
.count()
.reset_index(name='Active'))
print (df)
Office Month & Year Active
0 AA Jan-17 0
1 AA Feb-17 0
2 AA Mar-17 0
3 AA Apr-17 0
4 AA May-17 0
5 AA Jun-17 0
6 AA Jul-17 0
7 AA Aug-17 0
8 AA Sep-17 0
9 AA Oct-17 0
10 AA Nov-17 1
11 AA Dec-17 1
12 AA Jan-18 2
13 AA Feb-18 3
14 AA Mar-18 3
15 AA Apr-18 2
16 AB Jan-14 1
17 AB Feb-14 2
18 AB Mar-14 2
19 AB Apr-14 1