很高兴再次见到你。
您可以简单地检查数据框中红牌的总和:
if away_rosters_df['red_card'].astype(int).sum() > 0:
a_red_card = '*'
else:
a_red_card = ''
if home_rosters_df['red_card'].astype(int).sum() > 0:
h_red_card = '*'
else:
h_red_card = ''
然后将其连接到您想要的文本上:即:
teams_dict = {'a':matchObj['team_a']+a_red_card, 'h':matchObj['team_h']+h_red_card}
完整代码:
import requests
import json
import re
from pandas.io.json import json_normalize
import pandas as pd
response = requests.get('https://understat.com/match/9458')
shotsData = re.search("shotsData\s+=\s+JSON.parse\('([^']+)", response.text)
decoded_string = bytes(shotsData.groups()[0], 'utf-8').decode('unicode_escape')
shotsObj = json.loads(decoded_string)
match_info = re.search("match_info\s+=\s+JSON.parse\('([^']+)", response.text)
decoded_string = bytes(match_info.groups()[0], 'utf-8').decode('unicode_escape')
matchObj = json.loads(decoded_string)
rostersData = re.search("rostersData\s+=\s+JSON.parse\('([^']+)", response.text)
decoded_string = bytes(rostersData.groups()[0], 'utf-8').decode('unicode_escape')
rostersObj = json.loads(decoded_string)
# Shots Data into a DataFrame
away_shots_df = json_normalize(shotsObj['a'])
home_shots_df = json_normalize(shotsObj['h'])
shots_df = away_shots_df.append(home_shots_df)
# Rosters Data into a DataFrame
away_rosters_df = pd.DataFrame()
for key, v in rostersObj['a'].items():
temp_df = pd.DataFrame.from_dict([v])
away_rosters_df = away_rosters_df.append(temp_df)
home_rosters_df = pd.DataFrame()
for key, v in rostersObj['h'].items():
temp_df = pd.DataFrame.from_dict([v])
home_rosters_df = home_rosters_df.append(temp_df)
rosters_df = away_rosters_df.append(home_rosters_df)
if away_rosters_df['red_card'].astype(int).sum() > 0:
a_red_card = '*'
else:
a_red_card = ''
if home_rosters_df['red_card'].astype(int).sum() > 0:
h_red_card = '*'
else:
h_red_card = ''
teams_dict = {'a':matchObj['team_a']+a_red_card, 'h':matchObj['team_h']+h_red_card}
match_title = matchObj['team_h'] + ' vs. ' + matchObj['team_a']
#########################################################################
# Timing Chart is an aggregation (running sum) of xG from the shotsData
#########################################################################
import numpy as np
# Convert 'minute' astype int and sort the dataframe by 'minute'
shots_df['minute'] = shots_df['minute'].astype(int)
shots_df['xG'] = shots_df['xG'].astype(float)
timing_chart_df = shots_df[['h_a', 'minute', 'xG']].sort_values('minute')
timing_chart_df['h_a'] = timing_chart_df['h_a'].map(teams_dict)
# Get max value of the 'minute' column to interpolate minute interval between that range
max_value = timing_chart_df['minute'].max()
# Aggregate xG within the same minute
timing_chart_df = timing_chart_df.groupby(['h_a','minute'], as_index=False)['xG'].sum()
# Interpolate for each team/group
min_idx = np.arange(timing_chart_df['minute'].max() + 1)
m_idx = pd.MultiIndex.from_product([timing_chart_df['h_a'].unique(), min_idx], names=['h_a', 'minute'])
# Calculate the running sum
timing_chart_df = timing_chart_df.set_index(['h_a', 'minute']).reindex(m_idx, fill_value=0).reset_index()
timing_chart_df['running_sum_xG'] = timing_chart_df.groupby('h_a')['xG'].cumsum()
timing_chart_T_df = timing_chart_df.pivot(index='h_a', columns='minute', values='running_sum_xG')
timing_chart_T_df = timing_chart_T_df.reset_index().rename(columns={timing_chart_T_df.index.name:match_title})
from datetime import datetime
home_team = matchObj['team_h']+h_red_card
away_team = matchObj['team_a']+a_red_card
league = matchObj['league']
season = matchObj['season']
date = matchObj['date']
datetime_object = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
date = datetime_object.strftime('%A, %B %d, %Y')
results_df = pd.DataFrame([[league, season, date, home_team, away_team]], columns = ['League','Season','Date','Home team','Away team'])
home_xg_sum = timing_chart_df[timing_chart_df['h_a'] == home_team].pivot(index='h_a', columns='minute', values='running_sum_xG')
away_xg_sum = timing_chart_df[timing_chart_df['h_a'] == away_team].pivot(index='h_a', columns='minute', values='running_sum_xG')
data = [league, season, date, home_team, away_team] + home_xg_sum.values.tolist()[0] + away_xg_sum.values.tolist()[0]
cols = ['League','Season','Date','Home team','Away team'] + list(home_xg_sum.columns) + list(away_xg_sum.columns)
results_df = pd.DataFrame([data], columns = cols)
输出:
print(results_df.to_string())
League Season Date Home team Away team 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
0 EPL 2018 Saturday, February 23, 2019 Newcastle United Huddersfield* 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.087855 0.087855 0.087855 0.087855 0.474551 0.474551 0.474551 0.474551 0.52089 0.52089 0.52089 0.588242 0.588242 0.588242 0.588242 0.588242 0.588242 0.588242 0.650563 0.650563 0.650563 0.713521 0.765269 0.765269 0.765269 0.765269 0.765269 0.765269 0.765269 0.765269 0.765269 0.780235 0.862191 0.862191 0.862191 0.972581 1.00803 1.00803 2.01324 2.01324 2.103931 2.103931 2.103931 2.103931 2.248354 2.248354 2.248354 2.278213 2.278213 2.278213 2.278213 2.278213 2.278213 2.397133 2.397133 2.397133 2.397133 2.397133 2.397133 2.484387 2.484387 2.624275 2.624275 2.755339 2.868987 2.868987 2.868987 2.868987 3.011753 3.011753 3.011753 3.011753 3.011753 3.011753 3.011753 3.011753 3.026651 3.026651 3.026651 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.110397 0.110397 0.110397 0.110397 0.110397 0.110397 0.110397 0.110397 0.110397 0.110397 0.110397 0.110397 0.110397 0.110397 0.110397 0.110397 0.110397 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.120421 0.133949