我目前正在处理多线程 Windows 服务时看到一些异常行为。我遇到的问题是某些对象在从不同线程访问时似乎正在重置。
让我用一些代码来演示(简化以解释问题)......
首先,我有一个基于另一个类中的方法启动线程的类(使用 Ninject 获取类),然后停止它们:
public class ContainerService : ServiceBase
{
private IEnumerable<IRunnableBatch> _services;
public void start()
{
_services = ServiceContainer.SvcContainer.Kernel.GetAll<IRunnableBatch>();
foreach (IRunnableBatch s in _services)
{
s.run();
}
}
public void stop()
{
foreach (IRunnableBatch s in _services)
{
s.stop();
}
}
}
现在,在 IRunnableBatch 类的 run() 方法中,我有这样的东西:
public class Batch : IRunnableBatch
{
//this class is used for starting and stopping threads as well as tracking
//threads to restart them should the stop
protected IWatchdog _watchdog;
... code ommitted for brevity but the watchdog class is injected by Ninject
in the constructor ...
public void run()
{
_watchdog.startThreads(this);
}
public void stop()
{
_watchdog.stopThreads();
}
}
下面是 Watchdog 类的代码:
public class Watchdog : IWatchdog
{
private ILog _logger;
private Dictionary<int, MethodInfo> _batches = new Dictionary<int, MethodInfo>();
private Dictionary<int, Thread> _threads = new Dictionary<int, Thread>();
private IRunnableBatch _service;
private Thread _watcher;
private Dictionary<int, ThreadFailure> _failureCounts = new Dictionary<int, ThreadFailure>();
private bool _runWatchdog = true;
#region IWatchdog Members
/**
* This function will scan an IRunnableService for the custom attribute
* "BatchAttribute" and use that to determine what methods to run when
* a batch needs to be launched
*/
public void startThreads(IRunnableBatch s)
{
_service = s;
//scan service for runnable methods
Type t = s.GetType();
MethodInfo[] methods = t.GetMethods();
foreach (MethodInfo m in methods)
{
object[] attrs = m.GetCustomAttributes(typeof(BatchAttribute), true);
if (attrs != null && attrs.Length >= 1)
{
BatchAttribute b = attrs[0] as BatchAttribute;
_batches.Add(b.Batch_Number, m);
}
}
//loop through and see if the batches need to run
foreach (KeyValuePair<int, MethodInfo> kvp in _batches)
{
startThread(kvp.Key, kvp.Value);
}
//check if the watcher thread is running. If not, start it
if (_watcher == null || !_watcher.IsAlive)
{
_watcher = new Thread(new ThreadStart(watch));
_watcher.Start();
_logger.Info("Watcher thread started.");
}
}
private void startThread(int key, MethodInfo method)
{
if (_service.shouldBatchRun(key))
{
Thread thread = new Thread(new ThreadStart(() => method.Invoke(_service, null)));
try
{
thread.Start();
_logger.Info("Batch " + key + " (" + method.Name + ") has been started.");
if (_threads.ContainsKey(key))
{
_threads[key] = thread;
}
else
{
_threads.Add(key, thread);
}
}
catch (Exception ex)
{
//mark this as the first problem starting the thread.
if (ex is System.Threading.ThreadStateException || ex is System.OutOfMemoryException)
{
_logger.Warn("Unable to start thread: " + method.Name, ex);
ThreadFailure tf = new ThreadFailure();
tf.Count = 1;
_failureCounts.Add(key, tf);
}
else { throw; }
}
}
}
public void stopThreads()
{
_logger.Info("stopThreads called");
//stop the watcher thread first
if (_watcher != null && _watcher.IsAlive)
{
_logger.Info("Stopping watcher thread.");
_runWatchdog = false;
_watcher.Join();
_logger.Info("Watcher thread stopped.");
}
int stoppedCount = 0;
_logger.Info("There are " + _threads.Count + " batches to stop.");
while (stoppedCount < _threads.Count)
{
ArrayList stopped = new ArrayList();
foreach (KeyValuePair<int, Thread> kvp in _threads)
{
if (kvp.Value.IsAlive)
{
_service.stopBatch(kvp.Key);
kvp.Value.Join(); //wait for thread to terminate
_logger.Info("Batch " + kvp.Key.ToString() + " stopped");
}
else
{
_logger.Info("Batch " + kvp.Key + " (" + _batches[kvp.Key].Name + ") has been stopped");
stoppedCount++;
stopped.Add(kvp.Key);
}
}
foreach (int n in stopped)
{
_threads.Remove(n);
}
}
}
public void watch()
{
int numIntervals = 15 * 12; //15 minutes in 5 second intervals
while (_runWatchdog)
{
//cycle through the batches and check the matched threads.
foreach (KeyValuePair<int, MethodInfo> kvp in _batches)
{
//if they are not running
if (!_threads[kvp.Key].IsAlive)
{
//mark the thread failure and then try again.
ThreadFailure tf;
if (_failureCounts.ContainsKey(kvp.Key))
{
tf = _failureCounts[kvp.Key];
}
else
{
tf = new ThreadFailure();
}
tf.Count++;
if (tf.Count >= 8)
{
//log an error as we've been trying to start this thread for 2 hours now
_logger.Error("Unable to start the thread: " + kvp.Value.Name + " ***** NOT TRYING AGAIN UNTIL SERVICE RESTART");
}
else
{
_logger.Warn("Thread (" + kvp.Value.Name + ") found stopped... RESTARTING");
startThread(kvp.Key, kvp.Value);
}
}
}
//sleep 15 minutes and repeat.
_logger.Info("*** Watcher sleeping for 15 minutes");
for (int i = 1; i <= numIntervals; i++)
{
if (!_runWatchdog)
break;
Thread.Sleep(5000); //sleep for 5 seconds
}
_logger.Info("*** Watcher woke up.");
}
_logger.Info("Watcher thread stopping.");
}
public void setLogger(ILog l)
{
_logger = l;
}
#endregion
}
因此,主程序调用 ContainerService.start(),后者调用 IRunnableBatch.run(),后者调用 IWatchdog.startThreads()。startThreads() 方法定位并启动它找到的所有线程,然后启动一个线程来监视其他线程,以防它们因某种原因死亡。然后函数退出一路备份主函数。
现在,服务只是等待服务管理器调用 OnStop(),但出于测试目的,我让主线程休眠 1 分钟,然后调用 ContainerService.stop()。
在所有这些解释之后,我现在开始讨论这个问题......哇!
当主线程调用 stop() 并且 stop() 方法调用 IRunnableBatch.stop() 时,如果我在那里有一个断点并检查 _watchdog 变量,我会看到所有相关的成员变量都设置回它们的初始值(没有线程,没有观察者线程,没有批处理,什么都没有......)。
任何人有任何想法为什么?