我正在尝试实现一个 facebook 刮板,以获取有关 facebook 页面提要帖子的反应的见解。我注意到实际当天和最后几天的结果(帖子)是正确的,但是过去越远,越多的提要帖子被跳过,返回结果的数量非常少。
为什么 Graph 跳过很多帖子?有时它甚至会跳过整整几个月!
这是我正在使用的代码:
import json
import datetime
import csv
import time
import urllib.request
import urllib.error
import requests
import numpy as np
import matplotlib.pyplot as plt
import json
from urllib.parse import urlencode
import pandas as pd
page_id="nytimes"
token="my_User_Token_Here" #using a user token got from [https://developers.facebook.com/tools/explorer/][1]
url="https://graph.facebook.com/v2.12/"+page_id+"/posts/?fields=id,created_time,message,shares.summary(true).limit(0),comments.summary(true).limit(0),likes.summary(true),reactions.type(LOVE).limit(0).summary(total_count).as(Love),reactions.type(WOW).limit(0).summary(total_count).as(Wow),reactions.type(HAHA).limit(0).summary(total_count).as(Haha),reactions.type(SAD).limit(0).summary(1).as(Sad),reactions.type(ANGRY).limit(0).summary(1).as(Angry)&access_token="+token+"&limit=100"
posts = []
found = False
try:
while (True):
print(url)
facebook_connection = urlopen(url)
data = facebook_connection.read().decode('utf8')
json_object = json.loads(data)
allposts=json_object["data"]
allposts = np.asarray(allposts)
created = '2018-03-01'
for i in range(0,100,1):
if (pd.to_datetime(allposts[i]['created_time']) > pd.to_datetime(created)):
#print(allposts[i]['created_time'])
posts.append(allposts[i])
else:
print(i, "%i fucking here!")
posts.append(allposts[i])
found = True
break;
if (i == 99):
#print('here is: ' + i)
url = json_object["paging"]["next"]
if (found == True):
break;
df=pd.DataFrame(posts)
except Exception as ex:
print (ex)