WCF (.NET 4.0) 是否提供检索网页 HTML 的功能?如果没有,在 .NET 4.0 中最好的方法是什么?
问问题
90 次
2 回答
1
如果您尝试获取 HTML 内容(用于缓存等),Eric J 是对的。如果您打算将网页的某些部分提取为数据,您可能需要查看 HTML Agility Pack。 http://htmlagilitypack.codeplex.com/wikipage?title=示例
于 2012-08-24T17:36:58.630 回答
1
您可以使用WebClient检索 web paqe 的 HTML
using (WebClient web = new WebClient())
{
var data = web.DownloadData(myURL);
}
WebClient 存在一些限制,例如无法设置下载时间和不一致的进度事件。我编写了自己的子类来改进它。这是有用的代码。请注意,我正在处理的代码中有一个错误(请参阅WebClient Subclass Disposed during Event Handler, Result is ObjectDisposedException)。该错误的一个简单修复方法是在我的问题中提到的行周围放置一个 try/catch,但我试图了解该问题中的核心问题。
public class MyWebClient : WebClient, IDisposable
{
public int Timeout { get; set; }
public int TimeUntilFirstByte { get; set; }
public int TimeBetweenProgressChanges { get; set; }
public long PreviousBytesReceived { get; private set; }
public long BytesNotNotified { get; private set; }
public string Error { get; private set; }
public bool HasError { get { return Error != null; } }
private bool firstByteReceived = false;
private bool success = true;
private bool cancelDueToError = false;
private EventWaitHandle asyncWait = new ManualResetEvent(false);
private Timer abortTimer = null;
private bool isDisposed = false;
const long ONE_MB = 1024 * 1024;
public delegate void PerMbHandler(long totalMb);
public event PerMbHandler NotifyMegabyteIncrement;
public MyWebClient(int timeout = 60000, int timeUntilFirstByte = 30000, int timeBetweenProgressChanges = 15000)
{
this.Timeout = timeout;
this.TimeUntilFirstByte = timeUntilFirstByte;
this.TimeBetweenProgressChanges = timeBetweenProgressChanges;
this.DownloadFileCompleted += new System.ComponentModel.AsyncCompletedEventHandler(MyWebClient_DownloadFileCompleted);
this.DownloadProgressChanged += new DownloadProgressChangedEventHandler(MyWebClient_DownloadProgressChanged);
abortTimer = new Timer(AbortDownload, null, TimeUntilFirstByte, System.Threading.Timeout.Infinite);
}
protected void OnNotifyMegabyteIncrement(long totalMb)
{
if (NotifyMegabyteIncrement != null) NotifyMegabyteIncrement(totalMb);
}
void AbortDownload(object state)
{
cancelDueToError = true;
this.CancelAsync();
success = false;
Error = firstByteReceived ? "Download aborted due to >" + TimeBetweenProgressChanges + "ms between progress change updates." : "No data was received in " + TimeUntilFirstByte + "ms";
asyncWait.Set();
}
void MyWebClient_DownloadProgressChanged(object sender, DownloadProgressChangedEventArgs e)
{
if (cancelDueToError || isDisposed) return;
long additionalBytesReceived = e.BytesReceived - PreviousBytesReceived;
PreviousBytesReceived = e.BytesReceived;
BytesNotNotified += additionalBytesReceived;
if (BytesNotNotified > ONE_MB)
{
OnNotifyMegabyteIncrement(e.BytesReceived);
BytesNotNotified = 0;
}
firstByteReceived = true;
if (!isDisposed) abortTimer.Change(TimeBetweenProgressChanges, System.Threading.Timeout.Infinite);
}
public bool DownloadFileWithEvents(string url, string outputPath)
{
asyncWait.Reset();
Uri uri = new Uri(url);
this.DownloadFileAsync(uri, outputPath);
asyncWait.WaitOne();
return success;
}
void MyWebClient_DownloadFileCompleted(object sender, System.ComponentModel.AsyncCompletedEventArgs e)
{
if (cancelDueToError || isDisposed) return;
asyncWait.Set();
}
protected override WebRequest GetWebRequest(Uri address)
{
var result = base.GetWebRequest(address);
result.Timeout = this.Timeout;
return result;
}
void IDisposable.Dispose()
{
isDisposed = true;
if (asyncWait != null) asyncWait.Dispose();
if (abortTimer != null) abortTimer.Dispose();
base.Dispose();
}
}
于 2012-08-24T17:15:41.873 回答