3

我想抓取一个网页。问题是,这个网页有一个只能点击的加密链接。使用 webBrowser.Navigate 将不起作用。我设法模拟了一个点击动作,它打开了一个新窗口。现在我想要的是获取新窗口的 url。

private void Form1_Load(object sender, EventArgs e)
    {
        webBrowser1.Navigate(@"http://www.downarchive.ws/software/downloaders/795011-easy-mp3-downloader-4536.html");
    }

    private void webBrowser1_Navigated(object sender, WebBrowserNavigatedEventArgs e)
    {
        HtmlElementCollection links = webBrowser1.Document.GetElementsByTagName("a");

        foreach (HtmlElement link in links)
        {
            if (link.GetAttribute("href").Contains(@"http://www.downarchive.ws/engine/go.php?url="))
            {
                link.InvokeMember("Click");
                break;
            }
        }
    }

    private void webBrowser1_NewWindow(object sender, CancelEventArgs e)
    {
        var webBrowser = (WebBrowser)sender;
        MessageBox.Show(webBrowser.Url.ToString());

    }
4

3 回答 3

2

1)您不需要调用点击。最好使用导航方法。当您调用 click 时,链接可能会在新窗口中打开,可能会执行一些额外的 javascript 等。

2)如果您需要在所有重定向后获取 url,则有 DocumentCompleted 事件:

WebBrowserDocumentCompletedEventHandler onDocumentCompleted = (sender, e) => {
    Uri theUlrThatYouNeed = e.Url;
    webBrowser1.DocumentCompleted -= onDocumentCompleted;
};
webBrowser1.DocumentCompleted += onDocumentCompleted;
webBrowser1.Navigate("your encrypted url");

3) 如果链接是在外部 IE 窗口中打开的,那么它就消失了——您无法控制外部浏览器并从中接收事件。有时,重定向可以打开新窗口。为了防止这种情况,您可以使用扩展的 WebBrowser 类:

namespace ExtendedWebBrowser {
[ComImport, TypeLibType(TypeLibTypeFlags.FHidden),
InterfaceType(ComInterfaceType.InterfaceIsIDispatch),
Guid("34A715A0-6587-11D0-924A-0020AFC7AC4D")]
public interface DWebBrowserEvents2 {
    /// <summary>
    /// 
    /// </summary>
    /// <param name="ppDisp">
    /// An interface pointer that, optionally, receives the IDispatch interface
    /// pointer of a new WebBrowser object or an InternetExplorer object.
    /// </param>
    /// <param name="Cancel">
    /// value that determines whether the current navigation should be canceled
    /// </param>
    /// <param name="dwFlags">
    /// The flags from the NWMF enumeration that pertain to the new window
    /// See http://msdn.microsoft.com/en-us/library/bb762518(VS.85).aspx.
    /// </param>
    /// <param name="bstrUrlContext">
    /// The URL of the page that is opening the new window.
    /// </param>
    /// <param name="bstrUrl">The URL that is opened in the new window.</param>
    [DispId(0x111)]
    void NewWindow3(
        [In, Out, MarshalAs(UnmanagedType.IDispatch)] ref object ppDisp,
        [In, Out] ref bool Cancel,
        [In] uint dwFlags,
        [In, MarshalAs(UnmanagedType.BStr)] string bstrUrlContext,
        [In, MarshalAs(UnmanagedType.BStr)] string bstrUrl);
}
public partial class WebBrowserEx : WebBrowser {
    AxHost.ConnectionPointCookie cookie;
    DWebBrowserEvent2Helper helper;
    [Browsable(true)]
    public event EventHandler<WebBrowserNewWindowEventArgs> NewWindow3;
    [PermissionSetAttribute(SecurityAction.LinkDemand, Name = "FullTrust")]
    public WebBrowserEx() {
    }
    /// <summary>
    /// Associates the underlying ActiveX control with a client that can 
    /// handle control events including NewWindow3 event.
    /// </summary>
    [PermissionSetAttribute(SecurityAction.LinkDemand, Name = "FullTrust")]
    protected override void CreateSink() {
        base.CreateSink();

        helper = new DWebBrowserEvent2Helper(this);
        cookie = new AxHost.ConnectionPointCookie(
            this.ActiveXInstance, helper, typeof(DWebBrowserEvents2));
    }
    /// <summary>
    /// Releases the event-handling client attached in the CreateSink method
    /// from the underlying ActiveX control
    /// </summary>
    [PermissionSetAttribute(SecurityAction.LinkDemand, Name = "FullTrust")]
    protected override void DetachSink() {
        if (cookie != null) {
            cookie.Disconnect();
            cookie = null;
        }
        base.DetachSink();
    }
    /// <summary>
    ///  Raises the NewWindow3 event.
    /// </summary>
    protected virtual void OnNewWindow3(WebBrowserNewWindowEventArgs e) {
        if (this.NewWindow3 != null) {
            this.NewWindow3(this, e);
        }
    }
    private class DWebBrowserEvent2Helper : StandardOleMarshalObject, DWebBrowserEvents2 {
        private WebBrowserEx parent;
        public DWebBrowserEvent2Helper(WebBrowserEx parent) {
            this.parent = parent;
        }
        /// <summary>
        /// Raise the NewWindow3 event.
        /// If an instance of WebBrowser2EventHelper is associated with the underlying
        /// ActiveX control, this method will be called When the NewWindow3 event was
        /// fired in the ActiveX control.
        /// </summary>
        public void NewWindow3(ref object ppDisp, ref bool Cancel, uint dwFlags,
            string bstrUrlContext, string bstrUrl) {
            var e = new WebBrowserNewWindowEventArgs(bstrUrl, Cancel);
            this.parent.OnNewWindow3(e);
            Cancel = e.Cancel;
        }
    }
}
public class WebBrowserNewWindowEventArgs : EventArgs {
    public String Url { get; set; }
    public Boolean Cancel { get; set; }
    public WebBrowserNewWindowEventArgs(String url, Boolean cancel) {
        this.Url = url;
        this.Cancel = cancel;
    }
}
}

void WebBrowser_NewWindow(object sender, WebBrowserNewWindowEventArgs e) {
  if (!string.IsNullOrEmpty(e.Url)) {

    //Prevent new window
    e.Cancel = true;

    // Navigate to url from new window
    Navigate(e.Url);
  }
}

因此,如果您在其改进版本上替换您的 WebBrowser 控件,您将能够阻止新窗口。

于 2013-05-19T19:20:21.610 回答
1

我知道这个问题很老了,但我这样解决了:添加新引用,在 COM 中选择 Microsoft Internet Controls 并在代码中,在单击打开新窗口之前添加以下内容:

SHDocVw.WebBrowser_V1 axBrowser = (SHDocVw.WebBrowser_V1)webBrowser1.ActiveXInstance;
axBrowser.NewWindow += axBrowser_NewWindow;

然后添加以下方法:

void axBrowser_NewWindow(string URL, int Flags, string TargetFrameName, ref object PostData, string Headers, ref bool Processed)
    {
       Processed = true;
       webBrowser1.Navigate(URL);
    }
于 2020-06-09T17:16:45.943 回答
0

您可以使用文档事件来检索单击鼠标下的元素并将其存储为currentElement,然后在网络浏览器的NewWindow事件中,读取并导航到它。hrefcurrentElement

// after the document loaded
webBrowser1.Document.MouseMove += Document_MouseMove;


//On document mouse move, set the current Element
HtmlElement curElement;
void Document_MouseMove(object sender, HtmlElementEventArgs e)
{
    curElement = webBrowser1.Document.GetElementFromPoint(e.ClientMousePosition);
}
// Now you have the clicked element
void webBrowser1_NewWindow(object sender, CancelEventArgs e)
{
    e.Cancel = true;
    if (curElement != null && curElement.TagName == "A")
    {
        string href = curElement.GetAttribute("href");
        // do whatever
    }
}
于 2015-01-12T19:25:52.873 回答