我有我的脚本来监控一些 Facebook 页面。由于 Facebook API 在 2019 年 4 月 4 日禁止了页面公共访问权限。我需要通过 xpath 方法解析内容。
每个 Facebook 帖子都由div[contains(@class,"userContentWrapper")]
. 我想一一循环帖子以找到所需的数据。
我不知道为什么要在每个帖子$message = $post->findvalue('//div[@data-testid="post_message"]//p');
中显示所有文本。<p>
use LWP::UserAgent;
$ua = new LWP::UserAgent;
$request = new HTTP::Request;
$request->url('https://www.facebook.com/pg/FIFA/posts/');
$request->method('GET');
$request->header('User-Agent' => 'Mozilla/5.0 Chrome/71.0.3578.98 Safari/537.36');
$response = $ua->request($request);
open(HTM, ">zzz.htm");
print HTM $response->content;
close(HTM);
use HTML::TreeBuilder::XPath;
$tree = HTML::TreeBuilder::XPath->new_from_content($response->content);
$posts = $tree->findnodes('//div[contains(@class,"userContentWrapper")]');
for my $post (@{$posts})
{
$id = $post->findnodes('//div[@data-testid="story-subtitle"]/@id');
$id = $id->[0]->getValue;
print "id = $id\n\n";
$object_id = $post->findnodes('//div[@data-testid="story-subtitle"]//a/@href');
$object_id = 'https://www.facebook.com' . $object_id->[0]->getValue;
print "object_id = $object_id\n\n";
$message = $post->findvalue('//div[@data-testid="post_message"]//p');
# $message = $message->[0]->getValue;
print "$message\n\n";
$ajaxify = $post->findnodes('//div[@class="mtm"]//a/@ajaxify');
$ajaxify = $ajaxify->[0]->getValue;
print "ajaxify = $ajaxify\n\n";
$ploi = $post->findnodes('//div[@class="mtm"]//a/@data-ploi');
$ploi = $ploi->[0]->getValue;
print "ploi = $ploi\n\n";
# $plsi = $post->findnodes('//div[@class="mtm"]//a/@data-plsi');
# $plsi = $plsi->[0]->getValue;
# print "plsi = $plsi\n\n";
$href = $post->findnodes('//div[@class="mtm"]//a/@href');
$href = 'https://www.facebook.com' . $href->[0]->getValue;
print "href = $href\n\n";
print "---------------------------------------------------------\n\n";
}