我正在尝试为特定网站编写爬虫。在某些时候,我必须单击一个链接。找到该元素,但单击总是失败。我也尝试通过具有相同结果的父元素来实现它。之前的所有其他元素都可以毫无问题地访问,并且单击它们也可以正常工作。有人知道我做错了什么吗?
public class KauflandAngebotScraperPW : IScraper, IDisposable
{
private const string URL = "https://www.kaufland.de/";
private const int KETTENID = 1;
private static SemaphoreSlim _lock = new SemaphoreSlim(initialCount: 1);
private IPlaywright _playwright;
private string _dir;
private ShopprContext _db = new ShopprContext();
private ShopprContextProcedures _procedures;
private CancellationTokenSource _ctcCrawler;
private bool _crawlerRunning = false;
private Subject<KauflandAngebotScraperPW> _scrapingFinished = new Subject<KauflandAngebotScraperPW>();
private Subject<string> _message = new Subject<string>(); // The actual data stream
private bool disposedValue;
public bool CrawlerRunning { get => _crawlerRunning; set => _crawlerRunning = value; }
public IObservable<object> ScrapingFinished => _scrapingFinished;
public IObservable<string> Message => _message;
public KauflandAngebotScraperPW()
{
_dir = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments) + "/Shoppr/Kaufland/";
if (!Directory.Exists(_dir))
Directory.CreateDirectory(_dir);
_procedures = new ShopprContextProcedures(_db);
}
protected virtual void Dispose(bool disposing)
{
if (!disposedValue)
{
if (disposing)
{
// TODO: Verwalteten Zustand (verwaltete Objekte) bereinigen
}
// TODO: Nicht verwaltete Ressourcen (nicht verwaltete Objekte) freigeben und Finalizer überschreiben
// TODO: Große Felder auf NULL setzen
disposedValue = true;
}
}
// // TODO: Finalizer nur überschreiben, wenn "Dispose(bool disposing)" Code für die Freigabe nicht verwalteter Ressourcen enthält
// ~KauflandAngebotScraperPW()
// {
// // Ändern Sie diesen Code nicht. Fügen Sie Bereinigungscode in der Methode "Dispose(bool disposing)" ein.
// Dispose(disposing: false);
// }
public void Dispose()
{
// Ändern Sie diesen Code nicht. Fügen Sie Bereinigungscode in der Methode "Dispose(bool disposing)" ein.
Dispose(disposing: true);
GC.SuppressFinalize(this);
}
public void start()
{
if (_crawlerRunning)
return;
_crawlerRunning = true;
_ctcCrawler = new CancellationTokenSource();
var task = Task.Run(async () => {
ILoggerFactory loggerFactory = LoggerFactory.Create(builder =>
{
builder.SetMinimumLevel(LogLevel.Debug);
builder.AddFilter((f, _) => f == "PlaywrightSharp.Playwright");
});
//Auswahlmethode zurücksetzen
_playwright = await Playwright.CreateAsync(loggerFactory: loggerFactory, debug: "pw:api");
}, _ctcCrawler.Token);
task.Wait();
_ = ParseFilialenAsync();
}
public void stop()
{
if (!_crawlerRunning)
return;
_ctcCrawler.Cancel();
_crawlerRunning = false;
}
private async Task ParseFilialenAsync()
{
var filialen = _db.Filiale.Where(x => x.Kette == KETTENID);
foreach(var filiale in filialen)
{
await _lock.WaitAsync(_ctcCrawler.Token);
_ = ParseAngeboteAsync(filiale);
}
}
private async Task ParseAngeboteAsync(Filiale filiale)
{
IBrowser puppet;
IBrowserContext context;
IPage page = null;
puppet = await _playwright.Webkit.LaunchAsync(headless: false);
context = await puppet.NewContextAsync();
_message.OnNext($"Überprüfe aktuelle Angebote für Kaufland-Markt in {filiale.Straße}, {filiale.Plz} {filiale.Ort}");
try
{
if (_ctcCrawler.IsCancellationRequested)
{
return;
}
page = await context.NewPageAsync();
await page.GoToAsync(URL);
await Task.Delay(10000);
// cookie Setzen wenn notwendig
await AcceptCookieAsync(page);
//Setze den Markt
await Task.Delay(1000);
await SelectMarketAsync(filiale, page);
await Task.Delay(5000);
}
catch(Exception e)
{
_message.OnNext(e.Message);
await page?.ScreenshotAsync($"c:\\temp\\Kaufland_{filiale.Id}_{DateTime.Now.Ticks}.png");
}
finally
{
await context.CloseAsync();
await puppet.CloseAsync();
}
}
private async Task AcceptCookieAsync(IPage page)
{
try
{
var cookieAcceptButton = await page.WaitForSelectorAsync("button[class='cookie-alert-extended-button']", timeout: 5000);
await cookieAcceptButton.ClickAsync();
}
catch
{
// Is ok
}
}
private async Task SelectMarketAsync(Filiale filiale, IPage page)
{
//var marketSelektor = await page.WaitForSelectorAsync("a[class='m-store-flyout__link']", timeout: 5000);
var marketSelektor = await page.QuerySelectorAsync("div[class='m-navigation-meta__item m-navigation-meta__item-store']");
if(marketSelektor != null)
{
await marketSelektor.ClickAsync(50);
}
await Task.Delay(2000);
//li
IElementHandle elementHandle = await page.QuerySelectorAsync("a[class='a-link a-link--icon-arrow a-link--storeflyout-change']");
var changeFiliale = await elementHandle.WaitForSelectorAsync("xpath=..", WaitForState.Visible);
if(changeFiliale != null)
{
_message.OnNext((await changeFiliale.IsEnabledAsync()).ToString());
_message.OnNext((await changeFiliale.IsHiddenAsync()).ToString());
_message.OnNext((await changeFiliale.IsVisibleAsync()).ToString());
await changeFiliale.FocusAsync();
await changeFiliale.ClickAsync(40, force: true);
}
}
}
这是网站上的代码
This is the output of the Playwright log:
Navigiere zu Kaufland-Seite
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:42.048Z pw:api navigating to "https://www.kaufland.de/", waiting until "load"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:42.213Z pw:api navigated to "https://www.kaufland.de/"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.180Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.193Z pw:api navigated to "about:blank"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.195Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.203Z pw:api navigated to "about:blank"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.206Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:44.716Z pw:api navigated to "https://consentcdn.cookiebot.com/sdk/bc-v3.min.html"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:44.720Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.188Z pw:api navigated to "https://www.kaufland.de/"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.412Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.415Z pw:api navigated to "about:blank"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.418Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.422Z pw:api navigated to "about:blank"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.425Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.952Z pw:api "networkidle" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:46.677Z pw:api navigated to "https://consentcdn.cookiebot.com/sdk/bc-v3.min.html"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:46.693Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:47.188Z pw:api "networkidle" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:51.504Z pw:api "load" event fired
Akzeptiere Cookies
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.552Z pw:api waiting for selector "button[class='cookie-alert-extended-button']" to be visible
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.590Z pw:api selector resolved to visible <button tabindex="1" type="button" class="cookie-alert-e…>Zustimmen </button>
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.613Z pw:api attempting click action
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.613Z pw:api waiting for element to be visible, enabled and stable
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.646Z pw:api element is not stable - waiting...
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.761Z pw:api element is visible, enabled and stable
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.761Z pw:api scrolling into view if needed
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.763Z pw:api done scrolling
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.766Z pw:api checking that element receives pointer events at (865.48,541)
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.770Z pw:api element does receive pointer events
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.770Z pw:api performing click action
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.816Z pw:api click action done
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.816Z pw:api waiting for scheduled navigations to finish
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.816Z pw:api navigations have finished
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:03.923Z pw:api navigated to "https://kauflandstiftung.demdex.net/dest5.html?d_nsid=0#https%3A%2F%2Fwww.kaufland.de"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:03.934Z pw:api "load" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:03.934Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:04.448Z pw:api "networkidle" event fired
Klicke auf Markt
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:11.887Z pw:api attempting click action
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:11.888Z pw:api waiting for element to be visible, enabled and stable
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:11.929Z pw:api element is not stable - waiting...
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.025Z pw:api element is visible, enabled and stable
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.025Z pw:api scrolling into view if needed
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.026Z pw:api done scrolling
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.029Z pw:api checking that element receives pointer events at (324.55,43)
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.032Z pw:api element does receive pointer events
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.032Z pw:api performing click action
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.308Z pw:api click action done
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.308Z pw:api waiting for scheduled navigations to finish
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.308Z pw:api navigations have finished
Klicke auf 'Ändere Filiale'
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:25.212Z pw:api waiting for selector "xpath=.." to be visible
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:25.227Z pw:api selector resolved to hidden <li class="m-linklist__item">…</li>