编码:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using HtmlAgilityPack;
using System.IO;
using System.Text.RegularExpressions;
using System.Xml.Linq;
using System.Net;
using System.Web;
using System.Threading;
using DannyGeneral;
using GatherLinks;
namespace GatherLinks
{
class RetrieveWebContent
{
HtmlAgilityPack.HtmlDocument doc;
string imgg;
int images;
public RetrieveWebContent()
{
images = 0;
}
public List<string> retrieveImages(string address)
{
try
{
doc = new HtmlAgilityPack.HtmlDocument();
System.Net.WebClient wc = new System.Net.WebClient();
List<string> imgList = new List<string>();
doc.Load(wc.OpenRead(address));
HtmlNodeCollection imgs = doc.DocumentNode.SelectNodes("//img[@src]");
if (imgs == null) return new List<string>();
foreach (HtmlNode img in imgs)
{
if (img.Attributes["src"] == null)
continue;
HtmlAttribute src = img.Attributes["src"];
imgList.Add(src.Value);
if (src.Value.StartsWith("http") || src.Value.StartsWith("https") || src.Value.StartsWith("www"))
{
images++;
string[] arr = src.Value.Split('/');
imgg = arr[arr.Length - 1];
wc.DownloadFile(src.Value, @"d:\MyImages\" + imgg);
}
}
return imgList;
}
catch
{
Logger.Write("There Was Problem Downloading The Image: " + imgg);
return null;
}
}
}
}
例如给出此异常的链接:
http://vanessawest.tripod.com/bundybowman.jpg
经过几次迭代后,它进入了 foreach 循环。
现在,如果链接来自另一个站点,例如:
www.walla.co.il
因此,它进入 foreach 循环并获取所有图像是没有问题的。
这是链接的完整异常消息:
http://vanessawest.tripod.com/bundybowman.jpg
System.Net.WebException was caught
HResult=-2146233079
Message=An exception occurred during a WebClient request.
Source=System
StackTrace:
at System.Net.WebClient.DownloadFile(Uri address, String fileName)
at System.Net.WebClient.DownloadFile(String address, String fileName)
at GatherLinks.RetrieveWebContent.retrieveImages(String address) in d:\C-Sharp\GatherLinks\GatherLinks-2\GatherLinks\GatherLinks\RetrieveWebContent.cs:line 55
InnerException: System.ArgumentException
HResult=-2147024809
Message=Illegal characters in path.
Source=mscorlib
StackTrace:
at System.IO.Path.CheckInvalidPathChars(String path, Boolean checkAdditional)
at System.IO.FileStream.Init(String path, FileMode mode, FileAccess access, Int32 rights, Boolean useRights, FileShare share, Int32 bufferSize, FileOptions options, SECURITY_ATTRIBUTES secAttrs, String msgPath, Boolean bFromProxy, Boolean useLongPath, Boolean checkHost)
at System.IO.FileStream..ctor(String path, FileMode mode, FileAccess access)
at System.Net.WebClient.DownloadFile(Uri address, String fileName)
InnerException:
我不知道为什么在 walla link 上它的工作没有问题,而在三脚架上它的异常。