0

我有这段代码可以通过从“任何”ESPN 名册页面粘贴 URL 来抓取球员信息(姓名、位置、号码)。我说“任何”是因为任何页面中至少有一名球员没有号码/球衣值会出错。有没有办法解决这样的错误。

作为每个示例,费城老鹰队页面转换正确(https://www.espn.com/nfl/team/roster/_/name/phi)但底特律雄狮队名册没有(https://www.espn .com/nfl/team/roster/_/name/det

# -*- coding: utf-8 -*-
import os, json, re
import requests

team = ''
def SavePlayerData(DATA):
    global team
    for s in ['\\','/',':','*','?','"','<','>','|']:
        team = team.replace(s,'')
    outfilename = '%s.txt'%(team)
    with open(outfilename, 'w') as out_file:
        for line in DATA:
            out_file.write(line)

def GetTeamData(link):
    global opener, headers, team, short

    response = opener.get(link,headers=headers).text.encode('utf-8')

    content = re.search("window\['__espnfitt__'\]\=(.+?)\;</script>",response).group(1)

    jsonobj = json.loads(content)
    roster = jsonobj['page']['content']['roster']

    team = roster['team']['displayName']
    coach = roster['coach']['description']

    TEAM = []
    for group in roster['groups']:
        for player in group['athletes']:
            n=player['name']
            p=player['position']
            j=player['jersey']
            DATA = '%s%s\t%s %s %s (%s)\t[%s]\n'%(short,j,team,p,n,j,n)
            TEAM.append(DATA)

    DATA = '%shc\t%s %s %s\t[%s]\n'%(short,team,'head coach',coach,coach)
    TEAM.append(DATA)
    SavePlayerData(TEAM)
opener = requests.Session()
headers = {'host': 'www.espn.com',
           'user-agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}

if __name__=="__main__":

    teamURL = raw_input(' >> Enter the Team Roster URL :: ').strip()

    short = raw_input(' >> Enter the Letter for this Team :: ').strip().lower()
    if not short:
        short='d'

    try:
        if not teamURL:
            raise Exception

        if not '/roster/' in teamURL:
            teamURL = teamURL.replace('/team/_/','/team/roster/_/')

        print ('\n >> Collecting Data from <%s>\n'%(teamURL))
        GetTeamData(teamURL)
        print (' >> Link Scraped & Data Saved to File')

    except Exception as e:
        print ('\n >> Failed to Get Required Data, Re-Check your Input URL.')
4

1 回答 1

0

您可以使用 try/except,或者只是放入一个条件语句来检查球衣是否在数据中:

import os, json, re
import requests

team = ''
def SavePlayerData(DATA):
    global team
    for s in ['\\','/',':','*','?','"','<','>','|']:
        team = team.replace(s,'')
    outfilename = '%s.txt'%(team)
    with open(outfilename, 'w') as out_file:
        for line in DATA:
            out_file.write(line)

def GetTeamData(link):
    global opener, headers, team, short

    response = opener.get(link,headers=headers).text

    content = re.search("window\['__espnfitt__'\]\=(.+?)\;</script>",response).group(1)

    jsonobj = json.loads(content)
    roster = jsonobj['page']['content']['roster']

    team = roster['team']['displayName']
    coach = roster['coach']['description']

    TEAM = []
    for group in roster['groups']:
        for player in group['athletes']:
            n=player['name']
            p=player['position']
            if 'jersey' in player:
                j=player['jersey']
            else:
                j = ''
            DATA = '%s%s\t%s %s %s (%s)\t[%s]\n'%(short,j,team,p,n,j,n)
            TEAM.append(DATA)

    DATA = '%shc\t%s %s %s\t[%s]\n'%(short,team,'head coach',coach,coach)
    TEAM.append(DATA)
    SavePlayerData(TEAM)
opener = requests.Session()
headers = {'host': 'www.espn.com',
           'user-agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}

if __name__=="__main__":

    teamURL = input(' >> Enter the Team Roster URL :: ').strip()

    short = input(' >> Enter the Letter for this Team :: ').strip().lower()
    if not short:
        short='d'

    try:
        if not teamURL:
            raise Exception

        if not '/roster/' in teamURL:
            teamURL = teamURL.replace('/team/_/','/team/roster/_/')

        print ('\n >> Collecting Data from <%s>\n'%(teamURL))
        GetTeamData(teamURL)
        print (' >> Link Scraped & Data Saved to File')

    except Exception as e:
        print ('\n >> Failed to Get Required Data, Re-Check your Input URL.')
于 2021-11-01T08:47:31.703 回答