2

我正在尝试为特定网站编写爬虫。在某些时候,我必须单击一个链接。找到该元素,但单击总是失败。我也尝试通过具有相同结果的父元素来实现它。之前的所有其他元素都可以毫无问题地访问,并且单击它们也可以正常工作。有人知道我做错了什么吗?

    public class KauflandAngebotScraperPW : IScraper, IDisposable
    {
        private const string URL = "https://www.kaufland.de/";
        private const int KETTENID = 1;

        private static SemaphoreSlim _lock = new SemaphoreSlim(initialCount: 1);
        private IPlaywright _playwright;
        private string _dir;
        private ShopprContext _db = new ShopprContext();
        private ShopprContextProcedures _procedures;
        private CancellationTokenSource _ctcCrawler;
        private bool _crawlerRunning = false;
        private Subject<KauflandAngebotScraperPW> _scrapingFinished = new Subject<KauflandAngebotScraperPW>();
        private Subject<string> _message = new Subject<string>();    // The actual data stream


        private bool disposedValue;

        public bool CrawlerRunning { get => _crawlerRunning; set => _crawlerRunning = value; }

        public IObservable<object> ScrapingFinished => _scrapingFinished;

        public IObservable<string> Message => _message;

        public KauflandAngebotScraperPW()
        {
            _dir = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments) + "/Shoppr/Kaufland/";
            if (!Directory.Exists(_dir))
                Directory.CreateDirectory(_dir);

            _procedures = new ShopprContextProcedures(_db);
        }

        protected virtual void Dispose(bool disposing)
        {
            if (!disposedValue)
            {
                if (disposing)
                {
                    // TODO: Verwalteten Zustand (verwaltete Objekte) bereinigen
                }

                // TODO: Nicht verwaltete Ressourcen (nicht verwaltete Objekte) freigeben und Finalizer überschreiben
                // TODO: Große Felder auf NULL setzen
                disposedValue = true;
            }
        }

        // // TODO: Finalizer nur überschreiben, wenn "Dispose(bool disposing)" Code für die Freigabe nicht verwalteter Ressourcen enthält
        // ~KauflandAngebotScraperPW()
        // {
        //     // Ändern Sie diesen Code nicht. Fügen Sie Bereinigungscode in der Methode "Dispose(bool disposing)" ein.
        //     Dispose(disposing: false);
        // }

        public void Dispose()
        {
            // Ändern Sie diesen Code nicht. Fügen Sie Bereinigungscode in der Methode "Dispose(bool disposing)" ein.
            Dispose(disposing: true);
            GC.SuppressFinalize(this);
        }

        public void start()
        {
            if (_crawlerRunning)
                return;

            _crawlerRunning = true;
            _ctcCrawler = new CancellationTokenSource();

            var task = Task.Run(async () => {
                ILoggerFactory loggerFactory = LoggerFactory.Create(builder =>
                {
                    builder.SetMinimumLevel(LogLevel.Debug);
                    builder.AddFilter((f, _) => f == "PlaywrightSharp.Playwright");
                });

                //Auswahlmethode zurücksetzen
                _playwright = await Playwright.CreateAsync(loggerFactory: loggerFactory, debug: "pw:api");
            }, _ctcCrawler.Token);
            task.Wait();
             _ = ParseFilialenAsync();
        }

        public void stop()
        {
            if (!_crawlerRunning)
                return;

            _ctcCrawler.Cancel();
            _crawlerRunning = false;
        }

        private async Task ParseFilialenAsync()
        {
            var filialen = _db.Filiale.Where(x => x.Kette == KETTENID);
            foreach(var filiale in filialen)
            {
                await _lock.WaitAsync(_ctcCrawler.Token);
                _ = ParseAngeboteAsync(filiale);
            }
        }

        private async Task ParseAngeboteAsync(Filiale filiale)
        {
            IBrowser puppet;
            IBrowserContext context;
            IPage page = null;

            puppet = await _playwright.Webkit.LaunchAsync(headless: false);
            context = await puppet.NewContextAsync();

            _message.OnNext($"Überprüfe aktuelle Angebote für Kaufland-Markt in {filiale.Straße}, {filiale.Plz} {filiale.Ort}");
           
            try
            {
                if (_ctcCrawler.IsCancellationRequested)
                {
                    return;
                }

                page = await context.NewPageAsync();

                await page.GoToAsync(URL);
                await Task.Delay(10000);
                
                // cookie Setzen wenn notwendig
                await AcceptCookieAsync(page);
                //Setze den Markt
                await Task.Delay(1000);
                await SelectMarketAsync(filiale, page);
                await Task.Delay(5000);
            }
            catch(Exception e)
            {
                _message.OnNext(e.Message);
                await page?.ScreenshotAsync($"c:\\temp\\Kaufland_{filiale.Id}_{DateTime.Now.Ticks}.png");
            }
            finally
            {
                await context.CloseAsync();
                await puppet.CloseAsync();
            }
        }

        private async Task AcceptCookieAsync(IPage page)
        {
            try
            {
                var cookieAcceptButton = await page.WaitForSelectorAsync("button[class='cookie-alert-extended-button']", timeout: 5000);
                await cookieAcceptButton.ClickAsync();
            }
            catch
            {
                // Is ok
            }
        }

        private async Task SelectMarketAsync(Filiale filiale, IPage page)
        {
            //var marketSelektor = await page.WaitForSelectorAsync("a[class='m-store-flyout__link']", timeout: 5000);
            var marketSelektor = await page.QuerySelectorAsync("div[class='m-navigation-meta__item m-navigation-meta__item-store']");
            if(marketSelektor != null)
            {
                await marketSelektor.ClickAsync(50);
            }
            await Task.Delay(2000);

            //li 
            IElementHandle elementHandle = await page.QuerySelectorAsync("a[class='a-link a-link--icon-arrow a-link--storeflyout-change']");
            var changeFiliale = await elementHandle.WaitForSelectorAsync("xpath=..", WaitForState.Visible);
            if(changeFiliale != null)
            {
                _message.OnNext((await changeFiliale.IsEnabledAsync()).ToString());
                _message.OnNext((await changeFiliale.IsHiddenAsync()).ToString());
                _message.OnNext((await changeFiliale.IsVisibleAsync()).ToString());
                await changeFiliale.FocusAsync();
                await changeFiliale.ClickAsync(40, force: true);
            }
        }
    }

这是网站上的代码

在此处输入图像描述

这就是异常给我的: 在此处输入图像描述

This is the output of the Playwright log:
Navigiere zu Kaufland-Seite
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:42.048Z pw:api navigating to "https://www.kaufland.de/", waiting until "load"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:42.213Z pw:api   navigated to "https://www.kaufland.de/"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.180Z pw:api   "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.193Z pw:api   navigated to "about:blank"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.195Z pw:api   "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.203Z pw:api   navigated to "about:blank"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.206Z pw:api   "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:44.716Z pw:api   navigated to "https://consentcdn.cookiebot.com/sdk/bc-v3.min.html"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:44.720Z pw:api   "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.188Z pw:api   navigated to "https://www.kaufland.de/"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.412Z pw:api   "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.415Z pw:api   navigated to "about:blank"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.418Z pw:api   "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.422Z pw:api   navigated to "about:blank"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.425Z pw:api   "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.952Z pw:api   "networkidle" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:46.677Z pw:api   navigated to "https://consentcdn.cookiebot.com/sdk/bc-v3.min.html"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:46.693Z pw:api   "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:47.188Z pw:api   "networkidle" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:51.504Z pw:api   "load" event fired
Akzeptiere Cookies
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.552Z pw:api waiting for selector "button[class='cookie-alert-extended-button']" to be visible
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.590Z pw:api   selector resolved to visible <button tabindex="1" type="button" class="cookie-alert-e…&gt;Zustimmen </button>
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.613Z pw:api attempting click action
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.613Z pw:api   waiting for element to be visible, enabled and stable
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.646Z pw:api     element is not stable - waiting...
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.761Z pw:api   element is visible, enabled and stable
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.761Z pw:api   scrolling into view if needed
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.763Z pw:api   done scrolling
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.766Z pw:api   checking that element receives pointer events at (865.48,541)
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.770Z pw:api   element does receive pointer events
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.770Z pw:api   performing click action
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.816Z pw:api   click action done
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.816Z pw:api   waiting for scheduled navigations to finish
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.816Z pw:api   navigations have finished
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:03.923Z pw:api   navigated to "https://kauflandstiftung.demdex.net/dest5.html?d_nsid=0#https%3A%2F%2Fwww.kaufland.de"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:03.934Z pw:api   "load" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:03.934Z pw:api   "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:04.448Z pw:api   "networkidle" event fired
Klicke auf Markt
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:11.887Z pw:api attempting click action
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:11.888Z pw:api   waiting for element to be visible, enabled and stable
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:11.929Z pw:api     element is not stable - waiting...
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.025Z pw:api   element is visible, enabled and stable
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.025Z pw:api   scrolling into view if needed
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.026Z pw:api   done scrolling
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.029Z pw:api   checking that element receives pointer events at (324.55,43)
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.032Z pw:api   element does receive pointer events
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.032Z pw:api   performing click action
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.308Z pw:api   click action done
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.308Z pw:api   waiting for scheduled navigations to finish
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.308Z pw:api   navigations have finished
Klicke auf 'Ändere Filiale'
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:25.212Z pw:api waiting for selector "xpath=.." to be visible
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:25.227Z pw:api   selector resolved to hidden <li class="m-linklist__item">…&lt;/li>

应单击的元素的屏幕截图 在此处输入图像描述

4

1 回答 1

1

经过一段时间的尝试,我找到了解决方法。根本问题似乎是剧作家没有认识到元素可见性的变化之后

var marketSelektor = await page.QuerySelectorAsync("div[class='m-navigation-meta__item m-navigation-meta__item-store']");
if(marketSelektor != null)
{
    await marketSelektor.ClickAsync(50);
}

因此,以下行的执行失败并显示该元素不可见的日志

IElementHandle elementHandle = await page.QuerySelectorAsync("a[class='a-link a-link--icon-arrow a-link--storeflyout-change']");
var changeFiliale = await elementHandle.WaitForSelectorAsync("xpath=..", WaitForState.Visible);
if(changeFiliale != null)
{
    await changeFiliale.FocusAsync();
    await changeFiliale.ClickAsync(40, force: true);
}

所以我想知道如果我通过 playwright 方法执行一个 javascript-snippetWaitForFunctionAsync并插入以下块是否会一样

page.WaitForFunctionAsync("document.querySelector(\"a[class='a-link a-link--icon-arrow a-link--storeflyout-change']\").click()");
await Task.Delay(45000);

它有我想要的结果。我必须在没有 a 的情况下执行此操作await并在Task.Delay之后放置,否则即使在达到 30 秒标准超时之前很久就可以看到元素,它也会引发超时。我仍然想知道为什么。

我还观察到 WebKit、Chromium 和 Firefox 的性能差异很大,其中 WebKit 是最慢的。但这是另一回事。

于 2021-04-11T10:48:49.927 回答