我正在使用以下代码从烂番茄网站上抓取评论:
链接到页面。
import requests
import re
import json
import pandas as pd
import numpy as np
r = requests.get("https://www.rottentomatoes.com/m/avatar/reviews?type=user")
content = json.loads(re.search('movieReview\s=\s(.*);', r.text).group(1))
movieId = content["movieId"]
def getReviews(endCursor):
r = requests.get(f"https://www.rottentomatoes.com/napi/movie/{movieId}/reviews/user",
params = {
"direction": "next",
"endCursor": endCursor,
"startCursor": ""
})
return r.json()
data = {"User_Name": [], "Rating": [], "Review": []}
result = {}
for i in range(0, 5):
#print(f"[{i}] request review")
result = getReviews(result["pageInfo"]["endCursor"] if i != 0 else "")
data['User_Name'].extend(t['displayName'] for t in result["reviews"])
data['Rating'].extend(t['score'] for t in result["reviews"])
data['Review'].extend(t['review'] for t in result["reviews"])
df = pd.DataFrame(data)
我想将上面的代码转换为一个单独的函数。
在这里,我发布了我试图获取此功能代码的代码,但 json.loads() 出现错误:
“期望值:第 1 行第 1 列(字符 0)”
我已经搜索了解决方案并发现添加 headers 参数将解决但在这里不起作用。
我无法理解是什么导致了这个错误。如果有人可以指导我,那将很有帮助。
import requests
import re
import json
import pandas as pd
import numpy as np
def getReviews(movieId, endCursor):
r = requests.get(f"https://www.rottentomatoes.com/napi/{movieId}/reviews/user",
params = {
"direction": "next",
"endCursor": endCursor,
"startCursor": ""
},
headers={'Content-Type': 'application/json'}
)
return r.json()
def ScrapeReviews(movie):
url = "https://www.rottentomatoes.com/m/" + movie + "/reviews?type=user"
req = requests.get(url)
content = json.loads(re.search('movieReview\s=\s(.*);', req.text).group(1))
movie_id = content["movieId"]
data = {"User_Name": [], "Rating": [], "Review": []}
result = {}
for i in range(0, 5):
#print(f"[{i}] request review")
result = getReviews(movie_id, result["pageInfo"]["endCursor"] if i != 0 else "")
data['User_Name'].extend(t['displayName'] for t in result["reviews"])
data['Rating'].extend(t['score'] for t in result["reviews"])
data['Review'].extend(t['review'] for t in result["reviews"])
df = pd.DataFrame(data)
return df
d = ScrapeReviews('avatar')