我正在尝试使用 Puppeteer api 从网站生成一些 pdf 图像,但 Perimeterx 机器人检测将其检测为机器人和阻止站点。使用 Puppeteer 模仿真实的浏览器标题并绕过机器人防御者是否容易解决?
var options = new LaunchOptions
{
Headless = true
};
Console.WriteLine("Downloading chromium");
await new BrowserFetcher().DownloadAsync(BrowserFetcher.DefaultRevision);
Console.WriteLine("Navigating");
using (var browser = await Puppeteer.LaunchAsync(options))
using (var page = await browser.NewPageAsync())
{
await page.GoToAsync("https://www.matchesfashion.com");
Console.WriteLine("Generating PDF");
await page.PdfAsync(Path.Combine(Directory.GetCurrentDirectory(), "image.pdf"));
Console.WriteLine("Export completed");
if (!args.Any(arg => arg == "auto-exit"))
{
Console.ReadLine();
}
}