0

背景:我正在尝试使用从 Twitch 的 API 调用的数据创建一个数据框。他们每次调用只允许 100 条记录,因此每次拉取时都会提供一个新的分页光标,以便进入下一页。我正在使用以下代码来尝试有效地提取这些数据,而不是手动调整 get 响应中的 after=(pagination value)。现在我试图动态化的变量是“分页”变量,但它只有在循环完成后才会更新 - 没有帮助!看看下面,看看你是否注意到我可以改变什么来实现这个目标。任何帮助表示赞赏!

TwitchTopGamesDataFrame = [] #This is our Data List                    
BaseURL = 'https://api.twitch.tv/helix/games/top?first=100'
Headers = {'client-id':'lqctse0orgdbs5gdf5faz665api03r','Authorization': 'Bearer a1yl09mwmnwetp6ovocilheias8pzt'}
Indent = 2
Pagination = ''
FullURL = BaseURL + Pagination
Response = requests.get(FullURL,headers=Headers)
iterations = 1 # Data records returned are equivalent to iterations x100

#Loop: Response, Convert JSON data, Append to Data List, Get Pagination & Replace String in Variable - Iterate until 300 records
while count <= 3:
    #Grab JSON Data, Convert, & Append
    ResponseJSONData = Response.json()
    #print(pgn) - Debug
    pd.set_option('display.max_rows', None)
    TopGamesDF = pd.DataFrame(ResponseJSONData['data'])
    TopGamesDF = TopGamesDF[['id','name']]
    TopGamesDF = TopGamesDF.rename(columns={'id':'GameID','name':'GameName'})
    TopGamesDF['Rank'] = TopGamesDF.index + 1
    TwitchTopGamesDataFrame.append(TopGamesDF)
    #print(FullURL) - Debug
    #Grab & Replace Pagination Value
    ResponseJSONData['pagination']
    RPagination = pd.DataFrame(ResponseJSONData['pagination'],index=[0])
    pgn = str('&after='+RPagination.to_string(index=False,header=False).strip())
    Pagination = pgn
    #print(FullURL) - Debug
    iterations += 1
TwitchTopGamesDataFrame```
4

1 回答 1

0

弄清楚了:

def top_games(page_count):
    from time import gmtime, strftime
    strftime("%Y-%m-%d %H:%M:%S", gmtime())
    print("Time of Execution:", strftime("%Y-%m-%d %H:%M:%S", gmtime()))

    #In order to condense the code above and be more efficient, a while/for loop would work great.
    #Goal: Run a While Loop to create a larger DataFrame through Pagination as the Twitch API only allows for 100 records per call.

    baseURL = 'https://api.twitch.tv/helix/games/top?first=100' #Base URL
    Headers = {'client-id':'lqctse0orgdbs5gdf5faz665api03r','Authorization': 'Bearer a1yl09mwmnwetp6ovocilheias8pzt'}
    Indent = 2
    Pagination = ''
    FullURL = BaseURL + Pagination
    Response = requests.get(FullURL,headers=Headers)
    start_count = 0
    count = 0 # Data records returned are equivalent to iterations x100
    max_count = page_count

    #Loop: Response, Convert JSON data, Append to Data List, Get Pagination & Replace String in Variable - Iterate until 300 records
    while count <= max_count:
        #Grab JSON Data, Extend List
        Pagination
        FullURL = baseURL + Pagination
        Response = requests.get(FullURL,headers=Headers)
        ResponseJSONData = Response.json()
        pd.set_option('display.max_rows', None)
        if count == start_count:
            TopGamesDFL = ResponseJSONData['data']
        if count > start_count:
            i = ResponseJSONData['data']
            TopGamesDFL.extend(i)
        #Grab & Replace Pagination Value
        ResponseJSONData['pagination']
        RPagination = pd.DataFrame(ResponseJSONData['pagination'],index=[0])
        pgn = str('&after='+RPagination.to_string(index=False,header=False).strip())
        Pagination = pgn
        count += 1        
        if count == max_count:
            FinalDataFrame = pd.DataFrame(TopGamesDFL)
            FinalDataFrame = FinalDataFrame[['id','name']]
            FinalDataFrame = FinalDataFrame.rename(columns={'id':'GameID','name':'GameName'})
            FinalDataFrame['Rank'] = FinalDataFrame.index + 1
    return FinalDataFrame
于 2020-10-29T03:03:12.700 回答