由于缺乏更好的选择,我正在构建这个网络爬虫来从他们的搜索中收集 Steam 配置文件 ID。
它应该作为服务运行,但我无法使其工作。如果我复制代码并将其作为控制台应用程序运行,它会完美运行。
我是 Windows 服务编程和使用并行任务的新手,所以如果还有其他完全关闭或效率低下的东西,请赐教,但手头的问题更重要。
我尝试使用 eventviewer 对其进行调试,但我对这些东西也很陌生,我发现它非常乏味并且不知道要寻找什么。
下面是在控制台应用程序中工作的代码,但不是作为 Windows 服务。为什么它只能在控制台应用程序中工作?
public partial class SSIDGatherer : ServiceBase
{
EventLog log;
Thread t;
public SSIDGatherer()
{
InitializeComponent();
if (!EventLog.SourceExists("SSIDGatherer"))
{
EventLog.CreateEventSource("SSIDGatherer", "SteamStats");
}
log = new EventLog();
log.Source = "SSIDGatherer";
log.Log = "SteamStats";
}
protected override void OnStart(string[] args)
{
ServicePointManager.UseNagleAlgorithm = true;
ServicePointManager.Expect100Continue = true;
ServicePointManager.CheckCertificateRevocationList = true;
ServicePointManager.DefaultConnectionLimit = 50;
ThreadStart ts = new ThreadStart(Treading);
try
{
t = new Thread(ts);
t.Start();
}
catch (AggregateException agex)
{
string message = "";
foreach (Exception ex in agex.InnerExceptions)
{
message += string.Format("Exception: {0} \n\nStack: {1}", ex.Message, ex.StackTrace);
}
log.WriteEntry(message, EventLogEntryType.Error);
}
}
protected override void OnStop()
{
t.Abort();
}
private void Treading()
{
while (true)
{
//steam custom url allowed characters a-z 0-9 _ -
string[] characters = new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "-", "_" };
//for every combination you can do with 2 letters (which is minimum required for a player search)
Parallel.ForEach(characters, characterX =>
{
Parallel.ForEach(characters, characterY =>
{
CrawlSteamIDs("http://steamcommunity.com/actions/Search?T=Account", characterX + characterY, 1);
});
});
}
}
private void CrawlSteamIDs(string baseUrl, string keyword, int page)
{
HtmlDocument doc = new HtmlDocument();
try
{
string url = baseUrl + "&K=" + keyword + "&p=" + page;
//Some steam urls redirect to https for no reason, as the data is also available on http.
//Could be an error from steam, or an typo from me. Test it out later. For now, crudely replacing will have to do.
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url.Replace("https:", "http:"));
request.UserAgent = "Steam-Stats Web Crawler";
request.Timeout = 20000;
using (WebResponse response = request.GetResponse())
{
doc.Load(response.GetResponseStream());
}
}
catch (Exception ex)
{
log.WriteEntry("Timeout Error\nURL: " + baseUrl + "&K=" + keyword + "&p=" + page, EventLogEntryType.Error);
}
if (doc.DocumentNode.HasChildNodes)
{
//if the search returns any results otherwise just skip trying scanning
if (doc.DocumentNode.SelectSingleNode("//div[@id='resultsMsg']") == null)
{
Parallel.ForEach(doc.DocumentNode.SelectNodes("//a[@class='linkTitle']"), link =>
{
string href = link.Attributes["href"].Value;
//get profiles with custom name enabled
if (href.StartsWith("http://steamcommunity.com/id/"))
{
string steamID = href.Substring(29);
DAL db = new DAL("DB");
db.Addparameter("@SteamID", steamID, SqlDbType.NVarChar);
db.ExecuteNonQuery("QueueGathererSteamID", true);
}
//get profiles using the steam64ID
if (href.StartsWith("http://steamcommunity.com/profiles/"))
{
long steam64ID = Convert.ToInt64(href.Substring(35));
DAL db = new DAL("DB");
db.Addparameter("@Steam64ID", steam64ID, SqlDbType.NVarChar);
db.ExecuteNonQuery("QueueGathererSteam64ID", true);
}
});
//crawl next page if available
HtmlNode pagebtn = doc.DocumentNode.SelectSingleNode("//a[@class='pagebtn' and .='>']");
if (pagebtn != null)
{
CrawlSteamIDs(baseUrl, keyword, page + 1);
}
}
}
else
{
log.WriteEntry("Error Loading HTML Page: " + doc.ToString() + "\nURL: " + baseUrl + "&K=" + keyword + "&p=" + page, EventLogEntryType.Error);
//error loading page, try next page instead
CrawlSteamIDs(baseUrl, keyword, page + 1);
}
}
}
来自 eventviwer 的堆栈。源被列为 .NET 运行时
应用程序:SSIDGatherer.exe 框架版本:v4.0.30319 描述:进程因未处理的异常而终止。异常信息:System.AggregateException 堆栈:在 System.Threading.Tasks.Parallel.ForWorker[[System._Canon , mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]](Int32, Int32, System.Threading .Tasks.ParallelOptions, System.Action
1<Int32>, System.Action
2, System.Func4<Int32,System.Threading.Tasks.ParallelLoopState,System.__Canon,System.__Canon>, System.Func
1, System.Action1<System.__Canon>) at System.Threading.Tasks.Parallel.ForEachWorker[[System.__Canon, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089],[System.__Canon, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]](System.__Canon[], System.Threading.Tasks.ParallelOptions, System.Action
1, System.Action2<System.__Canon,System.Threading.Tasks.ParallelLoopState>, System.Action
3, System.Func4<System.__Canon,System.Threading.Tasks.ParallelLoopState,System.__Canon,System.__Canon>, System.Func
5, System.Func1<System.__Canon>, System.Action
1) 在 System.Threading.Tasks.Parallel.ForEachWorker[[System. _Canon,mscorlib,版本=4.0.0.0,文化=中性,PublicKeyToken=b77a5c561934e089],[System._佳能,mscorlib,版本=4.0.0.0,文化=中性,PublicKeyToken=b77a5c561934e089]](System.Collections.Generic.IEnumerable1<System.__Canon>, System.Threading.Tasks.ParallelOptions, System.Action
1,System.Action2<System.__Canon,System.Threading.Tasks.ParallelLoopState>, System.Action
3,System.Func4<System.__Canon,System.Threading.Tasks.ParallelLoopState,System.__Canon,System.__Canon>, System.Func
5,System.Func1<System.__Canon>, System.Action
1)在 System.Threading.Tasks .Parallel.ForEach[[系统。_Canon,mscorlib,版本=4.0.0.0,文化=中性,PublicKeyToken=b77a5c561934e089]](System.Collections.Generic.IEnumerable1<System.__Canon>, System.Action
1) 在 SSIDGatherer.SSIDGatherer.Treading() 在 System.Threading.ThreadHelper.ThreadStart_Context(System.Object) 在 System.Threading.ExecutionContext.Run(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object, Boolean)在 System.Threading.ExecutionContext.Run(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object) 在 System.Threading.ThreadHelper.ThreadStart()