1

我正在使用以下代码从烂番茄网站上抓取评论:

链接到页面。

import requests
import re
import json
import pandas as pd
import numpy as np

r = requests.get("https://www.rottentomatoes.com/m/avatar/reviews?type=user")
content = json.loads(re.search('movieReview\s=\s(.*);', r.text).group(1))

movieId = content["movieId"]

def getReviews(endCursor):
    r = requests.get(f"https://www.rottentomatoes.com/napi/movie/{movieId}/reviews/user",
    params = {
        "direction": "next",
        "endCursor": endCursor,
        "startCursor": ""
    })
    return r.json()

data = {"User_Name": [], "Rating": [], "Review": []}
result = {}

for i in range(0, 5):
    #print(f"[{i}] request review")
    result = getReviews(result["pageInfo"]["endCursor"] if i != 0  else "")
    data['User_Name'].extend(t['displayName'] for t in result["reviews"])
    data['Rating'].extend(t['score'] for t in result["reviews"])
    data['Review'].extend(t['review'] for t in result["reviews"])
    df = pd.DataFrame(data)

我想将上面的代码转换为一个单独的函数。

在这里,我发布了我试图获取此功能代码的代码,但 json.loads() 出现错误:
“期望值:第 1 行第 1 列(字符 0)”

我已经搜索了解决方案并发现添加 headers 参数将解决但在这里不起作用。

我无法理解是什么导致了这个错误。如果有人可以指导我,那将很有帮助。

import requests
import re
import json
import pandas as pd
import numpy as np

def getReviews(movieId, endCursor):
    r = requests.get(f"https://www.rottentomatoes.com/napi/{movieId}/reviews/user",
    params = {
        "direction": "next",
        "endCursor": endCursor,
        "startCursor": ""
        },
    headers={'Content-Type': 'application/json'}
    )
    return r.json()

def ScrapeReviews(movie):
    url = "https://www.rottentomatoes.com/m/" + movie + "/reviews?type=user"
    req = requests.get(url)
    content = json.loads(re.search('movieReview\s=\s(.*);', req.text).group(1))
    movie_id = content["movieId"]

    data = {"User_Name": [], "Rating": [], "Review": []}
    result = {}

    for i in range(0, 5):
        #print(f"[{i}] request review")
        result = getReviews(movie_id, result["pageInfo"]["endCursor"] if i != 0  else "")
        data['User_Name'].extend(t['displayName'] for t in result["reviews"])
        data['Rating'].extend(t['score'] for t in result["reviews"])
        data['Review'].extend(t['review'] for t in result["reviews"])
        df = pd.DataFrame(data)
    
    return df 
d = ScrapeReviews('avatar')
4

1 回答 1

1

错误在getReviews函数中,url应该是:

"https://www.rottentomatoes.com/napi/**movie**/{movieId}/reviews/user"

import requests
import re
import json
import pandas as pd
import numpy as np


def getReviews(movieId, endCursor):
    r = requests.get(
        f"https://www.rottentomatoes.com/napi/movie/{movieId}/reviews/user",
        params={"direction": "next", "endCursor": endCursor, "startCursor": ""},
        headers={"Content-Type": "application/json"},
    )
    return r.json()


def ScrapeReviews(movie):
    url = "https://www.rottentomatoes.com/m/" + movie + "/reviews?type=user"
    req = requests.get(url)
    content = json.loads(re.search("movieReview\s=\s(.*);", req.text).group(1))
    movie_id = content["movieId"]

    data = {"User_Name": [], "Rating": [], "Review": []}
    result = {}

    for i in range(0, 5):
        result = getReviews(
            movie_id, result["pageInfo"]["endCursor"] if i != 0 else ""
        )
        data["User_Name"].extend(t["displayName"] for t in result["reviews"])
        data["Rating"].extend(t["score"] for t in result["reviews"])
        data["Review"].extend(t["review"] for t in result["reviews"])

    df = pd.DataFrame(data)
    return df


d = ScrapeReviews("avatar")
print(d)

印刷:

        User_Name  Rating                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         Review
0           Joe D     5.0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       To me this is the most perfect blockbuster of all.\nLove Sam Worthington's empty cup, I find his everyman acting compelling, Saldana may be the most beautiful woman on the planet with her trademark perfect posture, and Sigourney adds class with extra to spare wherever she goes.\nThe planet Pandora remains the real star, and the revelation that we're the bad guys and the spiritual tree-huggers were right all along, I find genuinely touching every time.\nFirst class and I can't wait for more of Cameron's magic touch.
1         Jimmy W     1.0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           The fact that this movie can make the most money of all time and also gain a following of hive-minded morons to defend it says more about the state of society than it does the movie itself. For a movie that's meant to make a point about abusive use of the environment, they sure seem to indulge in the use of massive amounts of expensive technology that no doubt utilized way more than its fair share of natural resources. Oh well, at least you can pretend to be vindicated by the box office numbers.
2      Goudkuil E     1.5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   Apart from the visuals everything feels uninspired and thrown together using a old cliche of an outsider seeing what's wrong with what he's people have been doing falling in love then whiching sides. The acting is ok, the dialogue is kinda rough. The movie is padded with a lot of nice scenique views with no real narrative meaning.
3       Antonio D     4.0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  o filme possui uma fotografia muito bela e, mesmo o filme sendo de 2009 não conseguimos encontrar defeitos em relação a montagem e fotografia, a história é satisfatória e é um reflexo do que sabemos que aconteceu no inicio da colonização

...and so on.
于 2021-07-16T16:59:11.797 回答