0

我正在尝试使用从linkedin中提取的一些数据填充网格,我只是想让它为我自己的学习曲线工作,但是如果我删除了这条线

MessageBox.Show("asdfasdfasdf")

列表“消息”只有 1 项,如果我包含上面的行,它会达到预期的效果,我会收到 15 条消息

有人可以解释吗?

public void extract_messages_received(object sender, RoutedEventArgs e)
{
    triggered = false;
    System.Windows.Forms.WebBrowser browser = new System.Windows.Forms.WebBrowser();
    browser.Navigate(new Uri(@"http://www.linkedin.com/inbox/messages/received"));
    browser.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(browser_DocumentCompleted);
}

private void LoadMessages(string url)
{
    txtOutput.Text = @"http://www.linkedin.com" + url.Substring(6, url.Length - 6);
    if (!urls.Contains(url))
    {
        urls.Add(url);
        WebBrowser browser = new WebBrowser();
        browser.Navigate(new Uri(txtOutput.Text);

        loaded_message = false;
        browser.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(ReadMessages);
    }
}

private void ReadMessages(object sender, WebBrowserDocumentCompletedEventArgs e)
{
    if (loaded_message == false)
    {        
        string url = ((WebBrowser)sender).Url.ToString();
        int loc1 = url.IndexOf("itemID") + 7;
        int loc2 = url.IndexOf("&", loc1);
        IEnumerable<string> name = null;
        IEnumerable<string> odate = null;
        IEnumerable<string> photo = null;
        IEnumerable<string> subject = null;
        IEnumerable<string> headline = null;
        string body = "";
        string id = url.Substring(loc1, loc2 - loc1);
        //System.Windows.MessageBox.Show("READ");
        foreach (HtmlElement element in ((WebBrowser)sender).Document.GetElementsByTagName("div"))
        {
            if (element.GetAttribute("classname").Equals("inbox-item-body"))
            {
                body = element.InnerText;
            }
            if (element.GetAttribute("classname").Equals("inbox-item-header"))
            {
                var doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(element.InnerHtml);
                name = from foo in doc.DocumentNode.SelectNodes("//a[@class='fn']") select foo.InnerText;
                odate = from foo in doc.DocumentNode.SelectNodes("//p[@class='date']") select foo.InnerText;
                photo = from foo in doc.DocumentNode.SelectNodes("//img[@class='photo']") select foo.Attributes["src"].Value;
                subject = from foo in doc.DocumentNode.SelectNodes("//h3") select foo.InnerText;
                headline = from foo in doc.DocumentNode.SelectNodes("//span[@class='headline']") select foo.InnerText;
            }
        }

        // ****
        MessageBox.Show("asdfasdfasdf");
        // ****

        messages.Add(new Messages()
        {
            ID = id,
            Subject = subject.First().ToString(),
            Headline = headline.First().ToString(),
            Sender = name.First().ToString(),
            Photo = photo.First().ToString(),
            SendDate = odate.First().ToString(),
            Body = body
        });

           // dataMessages.ItemsSource = messages;
    }
    loaded_message = true;
}

void browser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
    if (!triggered)
    {
        triggered = true;
        System.Windows.Forms.WebBrowser web = sender as System.Windows.Forms.WebBrowser;
        foreach (HtmlElement element in web.Document.GetElementsByTagName("ol"))
        {
            if (element.GetAttribute("classname").Contains("inbox-list "))
            {
                WebBrowser browser = new WebBrowser();
                browser.Navigate("about:blank");
                browser.Document.Write(element.InnerHtml);
                HtmlElementCollection hrefTags = null;
                hrefTags = browser.Document.GetElementsByTagName("a");
                foreach (HtmlElement a in hrefTags)
                {
                    if (a.OuterHtml.Contains("displayMBox"))
                    {
                        LoadMessages(a.GetAttribute("href"));
                    }
                }
            }
        }
    }       
}
4

1 回答 1

0

这是一个时间问题。

当您在那里有消息框时,直到您关闭消息框后才会loaded_message设置为true,因此其他事件也在处理直到消息框,loaded_message在您关闭第一条消息之前,它们都没有设置为 true盒子。

如果您足够快地关闭消息框,您可能会在 1 和 15 之间看到一些数字。

让我们举一个更简单的例子:

    private void Form1_Load(object sender, EventArgs e)
    {

        for (int i = 0; i < 5; i++)
        {
            WebBrowser wb = new WebBrowser();
            wb.DocumentCompleted += wb_DocumentCompleted;
            wb.Navigate("http://www.stackoverflow.com");
        }
    }

    bool shown = false;
    void wb_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
    {
        if (!shown)
        {
            Console.WriteLine(shown);
            MessageBox.Show(shown.ToString());
            shown = true;
        }
    }

现在,如果您查看控制台,您会看到在false显示第一个消息框之前出现了一些。当我关闭消息框时,我会看到另外 4 个消息框,因为它们已经排队等待显示,然后shown才设置为 true。如果我注释掉消息框,那么我只会看到一个消息框和一个false在控制台中。

现在,问题变成了,为什么要添加并且需要检查loaded_message布尔变量。

我的猜测是您只想加载每条消息一次。如果是这种情况,请在字典中跟踪每个 URL,并为每个 URL 维护一个布尔值:

    Dictionary<string, bool> loadedUrls = new Dictionary<string, bool>();
    private void Form1_Load(object sender, EventArgs e)
    {

        for (int i = 0; i < 5; i++)
        {
            WebBrowser wb = new WebBrowser();
            wb.DocumentCompleted += wb_DocumentCompleted;
            string url = "http://stackoverflow.com/" + i;

            loadedUrls.Add(url, false);
            wb.Navigate(url);
        }
    }

    bool shown = false;
    void wb_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
    {

        if (loadedUrls.ContainsKey(e.Url.OriginalString) && loadedUrls[e.Url.OriginalString] == false)
        {
            loadedUrls[e.Url.OriginalString] = true;
            Console.WriteLine(shown);
            shown = true;
        }
    }

我离开shown那里是为了证明这种新方法现在适用于文档完成事件中的每次传递。您的输出窗口应该有 afalse后跟 4 true

于 2013-01-19T19:04:02.053 回答