1

由于缺乏更好的选择,我正在构建这个网络爬虫来从他们的搜索中收集 Steam 配置文件 ID。

它应该作为服务运行,但我无法使其工作。如果我复制代码并将其作为控制台应用程序运行,它会完美运行。

我是 Windows 服务编程和使用并行任务的新手,所以如果还有其他完全关闭或效率低下的东西,请赐教,但手头的问题更重要。

我尝试使用 eventviewer 对其进行调试,但我对这些东西也很陌生,我发现它非常乏味并且不知道要寻找什么。

下面是在控制台应用程序中工作的代码,但不是作为 Windows 服务。为什么它只能在控制台应用程序中工作?

public partial class SSIDGatherer : ServiceBase
{
    EventLog log;
    Thread t;

    public SSIDGatherer()
    {
        InitializeComponent();

        if (!EventLog.SourceExists("SSIDGatherer"))
        {
            EventLog.CreateEventSource("SSIDGatherer", "SteamStats");
        }

        log = new EventLog();
        log.Source = "SSIDGatherer";
        log.Log = "SteamStats";
    }

    protected override void OnStart(string[] args)
    {
        ServicePointManager.UseNagleAlgorithm = true;
        ServicePointManager.Expect100Continue = true;
        ServicePointManager.CheckCertificateRevocationList = true;
        ServicePointManager.DefaultConnectionLimit = 50;

        ThreadStart ts = new ThreadStart(Treading);

        try
        {
            t = new Thread(ts);
            t.Start();
        }
        catch (AggregateException agex)
        {
            string message = "";
            foreach (Exception ex in agex.InnerExceptions)
            {
                message += string.Format("Exception: {0} \n\nStack: {1}", ex.Message, ex.StackTrace);
            }

            log.WriteEntry(message, EventLogEntryType.Error);
        }
    }

    protected override void OnStop()
    {
        t.Abort();
    }

    private void Treading()
    {
        while (true)
        {
            //steam custom url allowed characters a-z 0-9 _ -
            string[] characters = new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "-", "_" };

            //for every combination you can do with 2 letters (which is minimum required for a player search)
            Parallel.ForEach(characters, characterX =>
            {
                Parallel.ForEach(characters, characterY =>
                            {
                                CrawlSteamIDs("http://steamcommunity.com/actions/Search?T=Account", characterX + characterY, 1);
                            });
            });
        }
    }

    private void CrawlSteamIDs(string baseUrl, string keyword, int page)
    {
        HtmlDocument doc = new HtmlDocument();

        try
        {
            string url = baseUrl + "&K=" + keyword + "&p=" + page;

            //Some steam urls redirect to https for no reason, as the data is also available on http.
            //Could be an error from steam, or an typo from me. Test it out later. For now, crudely replacing will have to do.
            HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url.Replace("https:", "http:"));
            request.UserAgent = "Steam-Stats Web Crawler";
            request.Timeout = 20000;

            using (WebResponse response = request.GetResponse())
            {
                doc.Load(response.GetResponseStream());
            }
        }
        catch (Exception ex)
        {
            log.WriteEntry("Timeout Error\nURL: " + baseUrl + "&K=" + keyword + "&p=" + page, EventLogEntryType.Error);
        }

        if (doc.DocumentNode.HasChildNodes)
        {
            //if the search returns any results otherwise just skip trying scanning
            if (doc.DocumentNode.SelectSingleNode("//div[@id='resultsMsg']") == null)
            {
                    Parallel.ForEach(doc.DocumentNode.SelectNodes("//a[@class='linkTitle']"), link =>
                                {
                                    string href = link.Attributes["href"].Value;

                                    //get profiles with custom name enabled
                                    if (href.StartsWith("http://steamcommunity.com/id/"))
                                    {
                                        string steamID = href.Substring(29);

                                        DAL db = new DAL("DB");
                                        db.Addparameter("@SteamID", steamID, SqlDbType.NVarChar);
                                        db.ExecuteNonQuery("QueueGathererSteamID", true);
                                    }

                                    //get profiles using the steam64ID
                                    if (href.StartsWith("http://steamcommunity.com/profiles/"))
                                    {
                                        long steam64ID = Convert.ToInt64(href.Substring(35));

                                        DAL db = new DAL("DB");
                                        db.Addparameter("@Steam64ID", steam64ID, SqlDbType.NVarChar);
                                        db.ExecuteNonQuery("QueueGathererSteam64ID", true);
                                    }
                                });

                //crawl next page if available
                HtmlNode pagebtn = doc.DocumentNode.SelectSingleNode("//a[@class='pagebtn' and .='>']");
                if (pagebtn != null)
                {
                    CrawlSteamIDs(baseUrl, keyword, page + 1);
                }
            }
        }
        else
        {
            log.WriteEntry("Error Loading HTML Page: " + doc.ToString() + "\nURL: " + baseUrl + "&K=" + keyword + "&p=" + page, EventLogEntryType.Error);

            //error loading page, try next page instead
            CrawlSteamIDs(baseUrl, keyword, page + 1);
        }
    }
}

来自 eventviwer 的堆栈。源被列为 .NET 运行时

应用程序:SSIDGatherer.exe 框架版本:v4.0.30319 描述:进程因未处理的异常而终止。异常信息:System.AggregateException 堆栈:在 System.Threading.Tasks.Parallel.ForWorker[[System._Canon , mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]](Int32, Int32, System.Threading .Tasks.ParallelOptions, System.Action 1<Int32>, System.Action2, System.Func 4<Int32,System.Threading.Tasks.ParallelLoopState,System.__Canon,System.__Canon>, System.Func1, System.Action 1<System.__Canon>) at System.Threading.Tasks.Parallel.ForEachWorker[[System.__Canon, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089],[System.__Canon, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]](System.__Canon[], System.Threading.Tasks.ParallelOptions, System.Action1, System.Action 2<System.__Canon,System.Threading.Tasks.ParallelLoopState>, System.Action3, System.Func 4<System.__Canon,System.Threading.Tasks.ParallelLoopState,System.__Canon,System.__Canon>, System.Func5, System.Func 1<System.__Canon>, System.Action1) 在 System.Threading.Tasks.Parallel.ForEachWorker[[System. _Canon,mscorlib,版本=4.0.0.0,文化=中性,PublicKeyToken=b77a5c561934e089],[System._佳能,mscorlib,版本=4.0.0.0,文化=中性,PublicKeyToken=b77a5c561934e089]](System.Collections.Generic.IEnumerable 1<System.__Canon>, System.Threading.Tasks.ParallelOptions, System.Action1,System.Action 2<System.__Canon,System.Threading.Tasks.ParallelLoopState>, System.Action3,System.Func 4<System.__Canon,System.Threading.Tasks.ParallelLoopState,System.__Canon,System.__Canon>, System.Func5,System.Func 1<System.__Canon>, System.Action1)在 System.Threading.Tasks .Parallel.ForEach[[系统。_Canon,mscorlib,版本=4.0.0.0,文化=中性,PublicKeyToken=b77a5c561934e089]](System.Collections.Generic.IEnumerable1<System.__Canon>, System.Action1) 在 SSIDGatherer.SSIDGatherer.Treading() 在 System.Threading.ThreadHelper.ThreadStart_Context(System.Object) 在 System.Threading.ExecutionContext.Run(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object, Boolean)在 System.Threading.ExecutionContext.Run(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object) 在 System.Threading.ThreadHelper.ThreadStart()

4

1 回答 1

0

尝试将Debugger.Launch放在服务的 OnStart 中,这样您就可以单步执行代码并找出问题所在。

于 2012-05-28T18:23:20.637 回答