我有以下代码,但是,当我启动它时,我只会缝合以返回一些 URL。
while (stopFlag != true)
{
WebRequest request = WebRequest.Create(urlList[i]);
using (WebResponse response = request.GetResponse())
{
using (StreamReader reader = new StreamReader
(response.GetResponseStream(), Encoding.UTF8))
{
string sitecontent = reader.ReadToEnd();
//add links to the list
// process the content
//clear the text box ready for the HTML code
//Regex urlRx = new Regex(@"((https?|ftp|file)\://|www.)[A-Za-z0-9\.\-]+(/[A-Za-z0-9\?\&\=;\+!'\(\)\*\-\._~%]*)*", RegexOptions.IgnoreCase);
Regex urlRx = new Regex(@"(?<url>(http:[/][/]|www.)([a-z]|[A-Z]|[0-9]|[/.]|[~])*)", RegexOptions.IgnoreCase);
MatchCollection matches = urlRx.Matches(sitecontent);
foreach (Match match in matches)
{
string cleanMatch = cleanUP(match.Value);
urlList.Add(cleanMatch);
updateResults(theResults, "\"" + cleanMatch + "\",\n");
}
}
}
}
我认为错误在正则表达式中。
我想要实现的是拉一个网页,然后从该页面获取所有链接 - 将这些链接添加到列表中,然后为每个列表项获取下一页并重复该过程。