
 public class ThreadClassSeqGroups
        public Dictionary<string, string> seqGroup;
        public Dictionary<string, List<SearchAlgorithm.CandidateStr>> completeModels;
        public Dictionary<string, List<SearchAlgorithm.CandidateStr>> partialModels;
        private Thread nativeThread;

        public ThreadClassSeqGroups(Dictionary<string, string> seqs)
            seqGroup = seqs;
            completeModels  = new Dictionary<string, List<SearchAlgorithm.CandidateStr>>();
            partialModels   = new Dictionary<string, List<SearchAlgorithm.CandidateStr>>();

        public void Run(DescrStrDetail dsd, DescrStrDetail.SortUnit primarySeedSu,
            List<ushort> secondarySeedOrder, double partialCutoff)
            nativeThread = new Thread(() => this._run(dsd, primarySeedSu, secondarySeedOrder, partialCutoff));
            nativeThread.Priority = ThreadPriority.Highest;

        public void _run(DescrStrDetail dsd, DescrStrDetail.SortUnit primarySeedSu,
            List<ushort> secondarySeedOrder, double partialCutoff)
            int groupSize = this.seqGroup.Count;
            int seqCount = 0;
            foreach (KeyValuePair<string, string> p in seqGroup)
                Console.WriteLine("ThreadID {0} (priority:{1}):\t#{2}/{3} SeqName: {4}",
                    nativeThread.ManagedThreadId, nativeThread.Priority.ToString(), ++seqCount, groupSize, p.Key);
                List<SearchAlgorithm.CandidateStr> tmpCompleteModels, tmpPartialModels;
                        p.Value.ToUpper().Replace('T', 'U'), dsd, primarySeedSu, secondarySeedOrder, partialCutoff,
                        out tmpCompleteModels, out tmpPartialModels);
                completeModels.Add(p.Key, tmpCompleteModels);
                partialModels.Add(p.Key, tmpPartialModels);

        public void Join()


class Program
    public static int _paramSeqGroupSize = 2000;
    static void Main(Dictionary<string, string> rawSeqs)
        // Split the whole rawSeqs (Dict<name, seq>) into several groups
        Dictionary<string, string>[] rawSeqGroups = SplitSeqFasta(rawSeqs, _paramSeqGroupSize);

        // Create a thread for each seqGroup and run
        var threadSeqGroups = new MultiThreading.ThreadClassSeqGroups[rawSeqGroups.Length];
        for (int i = 0; i < rawSeqGroups.Length; i++)
            threadSeqGroups[i] = new MultiThreading.ThreadClassSeqGroups(rawSeqGroups[i]);
            threadSeqGroups[i].Run(dsd, primarySeedSu, secondarySeedOrder, _paramPartialCutoff);

        // Merge results from threads after the thread finish
        var allCompleteModels   = new Dictionary<string, List<SearchAlgorithm.CandidateStr>>();
        var allPartialModels    = new Dictionary<string, List<SearchAlgorithm.CandidateStr>>();
        foreach (MultiThreading.ThreadClassSeqGroups t in threadSeqGroups)
            foreach (string name in t.completeModels.Keys)
                allCompleteModels.Add(name, t.completeModels[name]);
            foreach (string name in t.partialModels.Keys)
                allPartialModels.Add(name, t.partialModels[name]);



输入文件包含 2500 个字符串

_paramGroupSize = 3000, 主线程 + 1 个计算线程花费 200 秒

_paramGroupSize = 400,主线程 + 7 个计算线程花费更多时间(我在运行 10 多分钟后将其杀死)。




3 回答 3




于 2012-07-27T18:30:05.857 回答


  1. 您提到了一个“输入文件”,但这在代码中没有清楚地显示 - 如果您的文件访问正在被线程化,这不会提高性能,因为文件访问将成为瓶颈。
  2. 创建比 CPU 内核更多的线程最终会降低性能(除非每个线程都被阻塞等待不同的资源)。在您的情况下,我建议总共 8 个线程太多了。
  3. 似乎可以通过您的类完成大量数据(内存)访问,该类从您的方法中DescrStrDetail的变量传递到每个子线程。但是,缺少此变量的声明,因此它的用法/实现是未知的。如果此变量具有阻止多个线程同时访问的锁,那么您的多个线程可能会相互锁定此数据,从而进一步降低性能。dsdMain
于 2012-07-27T15:38:13.750 回答

When threads are run they are given time on a specific processor. if there are more threads than processors, the system context switches between threads to get all active threads some time to process. Context switching is really expensive. If you have more threads than processors most of the CPU time can be take up by context switching and make a single-threaded solution look faster than a multi thread solution.

Your example shows starting an indeterminate number of threads. if SplitSeqFasta returns more entries than cores, you will create more threads and cores and introduce a lot of context switching.

I suggest you throttle the number of threads manually, or use something like the thread parallel library and the Parallel class to have it automatically throttle for you.

于 2012-07-27T16:00:44.487 回答