'有人可以分解这段代码吗?如何在没有循环的情况下设置 groupby 和 set_index'
import pmdarima as pm
from pmdarima.model_selection import train_test_split
def MAPE(y_orig, y_pred):
diff = y_orig - y_pred
MAPE = np.mean((abs(y_orig - y_pred)/y_orig) * 100.)
return MAPE
Can I use this model without using the **def function** and instead of using MAPE calculations in the function, Can I use the MAPE calculations after running the model (separately)
def XYZ_analysis(df):
XYZ_list = []
for group, value in df.groupby(['CustomerID']):
grouped_df = (
value.loc[:, ['Timeperiod','Quantity']].set_index('Timeperiod')
)
# Using a simple Seasonal ARIMA model to highlight the idea, in the actual world, the model has to best fit the data
train, test = train_test_split(grouped_df, train_size=18)
model = pm.auto_arima(train, seasonal=True, m=4)
forecasts = model.predict(test.shape[0]) # predict N steps into the future
mape = MAPE(test.values.reshape(1,-1)[0], forecasts)
XYZ_list.append([group, mape])
XYZ_group = pd.DataFrame(XYZ_list, columns = ['CustomerID', 'MAPE'])
XYZ_group = XYZ_group.sort_values(by=['MAPE'], ascending=True)
## Ranking by forecastability
XYZ_group["Rank"] = XYZ_group['MAPE'].rank(ascending = True)
XYZ_group["Forecastability"] = ' '
## Checking the Importance of the Customers and Categorising into class A,B,C and splitting based on 20-30-50
XYZ_group['Forecastability'][0: int(0.2 * XYZ_group['Rank'].max())] = 'X'
XYZ_group['Forecastability'][int(0.2 * XYZ_group['Rank'].max()) : int(0.5 * XYZ_group['Rank'].max())] = 'Y'
XYZ_group['Forecastability'][int(0.5 * XYZ_group['Rank'].max()): ] = 'Z'
return XYZ_group
'谢谢'