1

我按年抓取电影信息。当我尝试打印语句时,它会打印所有 100 部电影,但是当我使用rich.table print 时,我只会得到第一部电影。

import requests
from bs4 import BeautifulSoup
from rich.table import Table
from rich.console import Console

table = Table()

url = 'https://www.rottentomatoes.com/top/bestofrt/?year='

year = input('Top 100 Movies for Which Year? ')
response = requests.get(url + year)
html = response.text

soup = BeautifulSoup(html, 'lxml')
containers = soup.find_all('table', class_='table')

for container in containers:
    for row in container.find_all('tr')[1:]:
        movie_rank = row.find('td', class_='bold')
        movie_rank = movie_rank.text

        movie_name = row.find('a', class_='unstyled articleLink')
        movie_name = movie_name.text.strip()
        movie_name = movie_name.strip('(' + year + ')')

        movie_rating = row.find('span', class_='tMeterScore')
        movie_rating = movie_rating.text

        # print(f'{movie_rank} {movie_name.strip()} - rating:{movie_rating}')
        table.add_column('Rank')
        table.add_column('Movie')
        table.add_column('Rating')
        # problem is here     
        table.add_row(movie_rank, movie_name, movie_rating)
       
        console = Console()
        console.print(table)
        break

4

1 回答 1

1

您在循环一次迭代后立即终止循环,您应该在构建表后打印一次。此外,您应该添加一次列(而不是每次迭代)。喜欢,

import requests
from bs4 import BeautifulSoup
from rich.table import Table
from rich.console import Console

table = Table()
table.add_column('Rank')
table.add_column('Movie')
table.add_column('Rating')

url = 'https://www.rottentomatoes.com/top/bestofrt/?year='

year = input('Top 100 Movies for Which Year? ')
response = requests.get(url + year)
html = response.text

soup = BeautifulSoup(html, 'lxml')
containers = soup.find_all('table', class_='table')

for container in containers:
    for row in container.find_all('tr')[1:]:
        movie_rank = row.find('td', class_='bold')
        movie_rank = movie_rank.text

        movie_name = row.find('a', class_='unstyled articleLink')
        movie_name = movie_name.text.strip()
        movie_name = movie_name.strip('(' + year + ')')

        movie_rating = row.find('span', class_='tMeterScore')
        movie_rating = movie_rating.text

        table.add_row(movie_rank, movie_name, movie_rating)

console = Console()
console.print(table)
于 2021-04-27T00:54:24.423 回答