我有一个脚本,它制作一个外部 url 的 iframe,它还将使用 PHP 刮板扩展复制 og 标签并将 og 标签插入框架页面。现在的问题是外部站点使用 javascript 来填充他们的 og 标签,所以我的脚本没有抓取正确的 og 标签,因为它不会使用执行的 javascript 来抓取。我怎样才能解决这个问题?
<?php
define( 'DELAY', '0' ); // delay to redirect
yourls_add_action( 'pre_redirect', 'og_scraper' );
function og_scraper( $args ) {
$url = $args[0];
$parsed_url = parse_url($url);
require '/var/www/fantasticpix.com/htdocs/pixiviz/dist/artworks/includes/vendor/autoload.php';
$web = new \spekulatius\phpscraper();
$web->go($url);
$data = $web->openGraph;
$description = $web->openGraph['og:description'];
if(empty($description))
{
$meta = get_meta_tags($url);
$description = $meta['description'];
}
$description = $web->openGraph['og:title'];
if(empty($title))
{
$page = file_get_contents($url);
$title = preg_match('/<title[^>]*>(.*?)<\/title>/ims', $page, $match) ? $match[1] : null;
}
$image = $web->openGraph['og:image'];
if(substr($image, 0, 4) != "http" || substr($web->openGraph['og:image'], 0, 5) != "https") $image = $parsed_url['scheme'] . "://" . $parsed_url['host'] . $web->openGraph['og:image'];
?>
<html>
<head>
<?php
echo '<meta property="og:title" content="' . $title . '" />
<meta property="og:type" content="' . $web->openGraph['og:type'] . '" />
<meta property="og:url" content="' . $web->openGraph['og:location'] . '" />
<meta property="og:image" content="' . $image . '" />
<meta property="og:site_name" content="' . $web->openGraph['og:site_name'] . '" />
<meta property="og:description" content="' . $description . '" />';
?>
</head>
<body><iframe id="yourls-frame" frameborder="0" noresize="noresize" src="<?php echo $url; ?>" name="yourlsFrame" style="position:fixed; top:0; left:0; bottom:0; right:0; width:100%; height:100%; border:none; margin:0; padding:0; overflow:hidden; z-index:999999;"></iframe></body>
</html>
<?php
die();
}