我遇到了导航问题。我从 html 表中获取行列表。我遍历这些行并从中刮取信息。但是在该行上还有一个链接,我单击该链接可以转到与要抓取的行相关的更多信息。然后我导航回带有原始表格的页面。这适用于第一行,但对于后续行,它会引发异常。
在第一次单击行内的链接后,我查看了我的行集合,但它们都没有像我单击链接之前那样的正确值。我相信当我导航到一个我没有得到的不同 URL 时会发生一些事情。
我的代码如下。我如何使它工作,以便我可以迭代父表,单击每行中的链接,导航到子表,但仍继续迭代父表中的行?
private List<Document> getResults()
{
var documents = new List<Document>();
//Results
IWebElement docsTable = this.webDriver.FindElements(By.TagName("table"))
.Where(table => table.Text.Contains("Document List"))
.FirstOrDefault();
var validDocRowRegex = new Regex(@"^(\d{3}\s+)");
var docRows = docsTable.FindElements(By.TagName("tr"))
.Where(row =>
//It throws an exception with .FindElement() when there isn't one.
row.FindElements(By.TagName("td")).FirstOrDefault() != null &&
//Yeah, I don't get this one either. I negate the match and so it works??
!validDocRowRegex.IsMatch(
row.FindElement(By.TagName("td")).Text))
.ToList();
foreach (var docRow in docRows)
{
//Todo: find out why this is crashing on some documents.
var cells = docRow.FindElements(By.TagName("td"));
var document = new Document
{
DocID = Convert.ToInt32(cells.First().Text),
PNum = Convert.ToInt32(cells[1].Text),
AuthNum = Convert.ToInt32(cells[2].Text)
};
//Go to history for the current document.
cells.Where(cell =>
cell.FindElements(By.TagName("a")).FirstOrDefault() != null)
.FirstOrDefault().Click();
//Todo: scrape child table.
this.webDriver.Navigate().Back();
}
return documents;
}
更新:(回应吉姆埃文斯的回答)
这看起来工作正常。
private List<Document> getResults()
{
var documents = new List<Document>();
IWebElement docRow = null;
int rowIndex = 0;
while((docRow = this.getDocumentRow(rowIndex)) != null)
{
var cells = docRow.FindElements(By.TagName("td"));
var document = new Document
{
DocID = Convert.ToInt32(cells.First().Text),
PNum = Convert.ToInt32(cells[1].Text),
AuthNum = Convert.ToInt32(cells[2].Text)
};
//Go to history for the current document.
cells.Where(cell =>
cell.FindElements(By.TagName("a")).FirstOrDefault() != null)
.FirstOrDefault().Click();
//Todo: scrape child table.
this.webDriver.Navigate().Back();
documents.Add(well);
rowIndex++;
}
return documents;
}
private IWebElement getDocumentRow(int rowIndex)
{
try
{
IWebElement docsTable = this.webDriver.FindElements(By.TagName("table"))
.Where(table => table.Text.Contains("Document List"))
.FirstOrDefault();
var validDocRowRegex = new Regex(@"^(\d{3}\s+)");
var docRow = docsTable.FindElements(By.TagName("tr"))
.Where(row =>
//It throws an exception with .FindElement() when there isn't one.
row.FindElements(By.TagName("td")).FirstOrDefault() != null &&
//Yeah, I don't get this one either. I negate the match and so it works??
!validDocRowRegex.IsMatch(
row.FindElement(By.TagName("td")).Text))
.ElementAt(rowIndex);
return docRow;
}
catch
{
return null;
}
}